コード例 #1
0
ファイル: test_hdbscan.py プロジェクト: Pandinosaurus/hdbscan
def test_hdbscan_feature_vector():
    labels, p, persist, ctree, ltree, mtree = hdbscan(X)
    n_clusters_1 = len(set(labels)) - int(-1 in labels)
    assert n_clusters_1 == n_clusters

    labels = HDBSCAN().fit(X).labels_
    n_clusters_2 = len(set(labels)) - int(-1 in labels)
    assert n_clusters_2 == n_clusters

    validity = validity_index(X, labels)
    assert validity >= 0.4
コード例 #2
0
def test_hdbscan_feature_vector():
    labels, p, persist, ctree, ltree, mtree = hdbscan(X)
    n_clusters_1 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_1, n_clusters)

    labels = HDBSCAN().fit(X).labels_
    n_clusters_2 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_2, n_clusters)

    validity = validity_index(X, labels)
    assert_greater_equal(validity, 0.4)
コード例 #3
0
ファイル: test_hdbscan.py プロジェクト: CyberSift/hdbscan
def test_hdbscan_feature_vector():
    labels, p, persist, ctree, ltree, mtree = hdbscan(X)
    n_clusters_1 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_1, n_clusters)

    labels = HDBSCAN().fit(X).labels_
    n_clusters_2 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_2, n_clusters)

    validity = validity_index(X, labels)
    assert_greater_equal(validity, 0.4)
コード例 #4
0
ファイル: test_hdbscan.py プロジェクト: Pandinosaurus/hdbscan
def test_hdbscan_distance_matrix():
    D = distance.squareform(distance.pdist(X))
    D /= np.max(D)

    labels, p, persist, ctree, ltree, mtree = hdbscan(D, metric="precomputed")
    # number of clusters, ignoring noise if present
    n_clusters_1 = len(set(labels)) - int(-1 in labels)  # ignore noise
    assert n_clusters_1 == n_clusters

    labels = HDBSCAN(metric="precomputed").fit(D).labels_
    n_clusters_2 = len(set(labels)) - int(-1 in labels)
    assert n_clusters_2 == n_clusters

    validity = validity_index(D, labels, metric="precomputed", d=2)
    assert validity >= 0.6
コード例 #5
0
ファイル: test_hdbscan.py プロジェクト: CyberSift/hdbscan
def test_hdbscan_distance_matrix():
    D = distance.squareform(distance.pdist(X))
    D /= np.max(D)

    labels, p, persist, ctree, ltree, mtree = hdbscan(D, metric='precomputed')
    # number of clusters, ignoring noise if present
    n_clusters_1 = len(set(labels)) - int(-1 in labels)  # ignore noise
    assert_equal(n_clusters_1, n_clusters)

    labels = HDBSCAN(metric="precomputed").fit(D).labels_
    n_clusters_2 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_2, n_clusters)

    validity = validity_index(D, labels, metric='precomputed', d=2)
    assert_greater_equal(validity, 0.6)