def test_auto_extract_hier(): # Generate sample data np.random.seed(0) n_points_per_cluster = 250 X = np.empty((0, 2)) X = np.r_[X, [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)] X = np.r_[X, [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)] X = np.r_[X, [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)] X = np.r_[X, [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)] X = np.r_[X, [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)] X = np.r_[X, [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)] # Compute OPTICS clust = OPTICS(eps=30.3, min_samples=9) # Run the fit clust.fit(X) # Extract the result # eps not used for 'auto' extract clust.extract(0.0, 'auto') assert_equal(len(set(clust.labels_)), 6)
def test_filter(): # Tests the filter function. n_clusters = 3 X = generate_clustered_data(n_clusters=n_clusters) # Parameters chosen specifically for this task. clust = OPTICS(eps=6.0, min_samples=4, metric='euclidean') # Run filter (before computing OPTICS) bool_memb = clust.filter(X, 0.5) idx_memb = clust.filter(X, 0.5, index_type='idx') # Test for equivalence between 'idx' and 'bool' extraction assert_equal(sum(bool_memb), len(idx_memb)) # Compute OPTICS clust.fit(X) clust.extract(0.5, clustering='dbscan') # core points from filter and extract should be the same within 1 point, # with extract occasionally underestimating due to start point of the # OPTICS algorithm. Here we test for at least 95% similarity in # classification of core/not core agree = sum(clust._is_core == bool_memb) assert_greater_equal(float(agree)/len(X), 0.95)
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=14, alpha=0.5) xy = X[class_member_mask & ~core_samples_mask] plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=2, alpha=0.5) plt.title("Automatic Clustering \n Estimated number of clusters: %d" % clust.n_clusters) # (Re)-extract clustering structure, using a single eps to show comparison # with DBSCAN. This can be run for any clustering distance, and can be run # multiple times without rerunning OPTICS. OPTICS does need to be re-run to c # hange the min-pts parameter. clust.extract(.15, 'dbscan') core_samples_mask = np.zeros_like(clust.labels_, dtype=bool) core_samples_mask[clust.core_sample_indices_] = True # Black removed and is used for noise instead. unique_labels = set(clust.labels_) colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels))) plt.subplot(223) for k, col in zip(unique_labels, colors): if k == -1: # Black used for noise. col = 'k'
def test_empty_extract(): # Test extract where fit() has not yet been run. clust = OPTICS(eps=0.3, min_samples=10) assert clust.extract(0.01, clustering='auto') is None