Ejemplo n.º 1
0
    def test_sparse(self):
        """ Tests that DBSCAN produces the same results with sparse and
        dense data.
        """
        n_samples = 1500
        x, y = make_blobs(n_samples=n_samples, random_state=170)
        dbscan = DBSCAN(n_regions=1, eps=.15)
        transformation = [[0.6, -0.6], [-0.4, 0.8]]
        x = np.dot(x, transformation)
        x = StandardScaler().fit_transform(x)

        dense = ds.array(x, block_size=(300, 2))
        sparse = ds.array(csr_matrix(x), block_size=(300, 2))

        y_dense = dbscan.fit_predict(dense).collect()
        y_sparse = dbscan.fit_predict(sparse).collect()

        self.assertTrue(np.array_equal(y_dense, y_sparse))
Ejemplo n.º 2
0
    def test_random_clusters_2(self):
        """ Tests DBSCAN on random data with multiple clusters. """
        # 2 dimensions
        np.random.seed(2)
        x = np.random.uniform(0, 10, size=(1000, 2))
        ds_x = ds.array(x, block_size=(300, 2))
        dbscan = DBSCAN(n_regions=10, max_samples=10, eps=0.5, min_samples=10)
        y = dbscan.fit_predict(ds_x).collect()

        self.assertEqual(dbscan.n_clusters, 27)
        self.assertEqual(np.count_nonzero(y == -1), 206)
Ejemplo n.º 3
0
    def test_random_clusters_3(self):
        """ Tests DBSCAN on random data with multiple clusters. """
        # 3 dimensions
        np.random.seed(3)
        x = np.random.uniform(0, 10, size=(1000, 3))
        ds_x = ds.array(x, block_size=(300, 3))
        dbscan = DBSCAN(n_regions=10,
                        dimensions=[0, 1],
                        eps=0.9,
                        min_samples=4)
        y = dbscan.fit_predict(ds_x).collect()

        self.assertEqual(dbscan.n_clusters, 50)
        self.assertEqual(np.count_nonzero(y == -1), 266)
Ejemplo n.º 4
0
    def test_n_clusters_aniso_dimensions(self):
        """ Tests that DBSCAN finds the correct number of clusters when
        dimensions is not None.
        """
        n_samples = 1500
        x, y = make_blobs(n_samples=n_samples, random_state=170)
        dbscan = DBSCAN(n_regions=5, dimensions=[1], eps=.15)
        transformation = [[0.6, -0.6], [-0.4, 0.8]]
        x = np.dot(x, transformation)
        x = StandardScaler().fit_transform(x)
        ds_x = ds.array(x, block_size=(300, 2))
        y_pred = dbscan.fit_predict(ds_x).collect()
        true_sizes = {19, 496, 491, 488, 6}
        cluster_sizes = {
            y_pred[y_pred == -1].size, y_pred[y_pred == 0].size,
            y_pred[y_pred == 1].size, y_pred[y_pred == 2].size,
            y_pred[y_pred == 3].size
        }

        self.assertEqual(dbscan.n_clusters, 4)
        self.assertEqual(true_sizes, cluster_sizes)