예제 #1
0
def test_auto_extract_hier():
    # Generate sample data

    np.random.seed(0)
    n_points_per_cluster = 250

    X = np.empty((0, 2))
    X = np.r_[X, [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)]
    X = np.r_[X, [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)]

    # Compute OPTICS

    clust = OPTICS(eps=30.3, min_samples=9)

    # Run the fit
    clust.fit(X)

    # Extract the result
    # eps not used for 'auto' extract
    clust.extract(0.0, 'auto')

    assert_equal(len(set(clust.labels_)), 6)
예제 #2
0
def test_filter():
    # Tests the filter function.

    n_clusters = 3
    X = generate_clustered_data(n_clusters=n_clusters)
    # Parameters chosen specifically for this task.
    clust = OPTICS(eps=6.0, min_samples=4, metric='euclidean')
    # Run filter (before computing OPTICS)
    bool_memb = clust.filter(X, 0.5)
    idx_memb = clust.filter(X, 0.5, index_type='idx')
    # Test for equivalence between 'idx' and 'bool' extraction
    assert_equal(sum(bool_memb), len(idx_memb))
    # Compute OPTICS
    clust.fit(X)
    clust.extract(0.5, clustering='dbscan')
    # core points from filter and extract should be the same within 1 point,
    # with extract occasionally underestimating due to start point of the
    # OPTICS algorithm. Here we test for at least 95% similarity in
    # classification of core/not core
    agree = sum(clust._is_core == bool_memb)
    assert_greater_equal(float(agree)/len(X), 0.95)
예제 #3
0
    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
             markeredgecolor='k', markersize=14, alpha=0.5)

    xy = X[class_member_mask & ~core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
             markeredgecolor='k', markersize=2, alpha=0.5)

plt.title("Automatic Clustering \n Estimated number of clusters: %d"
          % clust.n_clusters)

# (Re)-extract clustering structure, using a single eps to show comparison
# with DBSCAN. This can be run for any clustering distance, and can be run
# multiple times without rerunning OPTICS. OPTICS does need to be re-run to c
# hange the min-pts parameter.

clust.extract(.15, 'dbscan')

core_samples_mask = np.zeros_like(clust.labels_, dtype=bool)
core_samples_mask[clust.core_sample_indices_] = True

# Black removed and is used for noise instead.
unique_labels = set(clust.labels_)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))

plt.subplot(223)

for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = 'k'
예제 #4
0
def test_empty_extract():
    # Test extract where fit() has not yet been run.

    clust = OPTICS(eps=0.3, min_samples=10)
    assert clust.extract(0.01, clustering='auto') is None