def test_hdbscan_feature_vector(): labels, p, persist, ctree, ltree, mtree = hdbscan(X) n_clusters_1 = len(set(labels)) - int(-1 in labels) assert n_clusters_1 == n_clusters labels = HDBSCAN().fit(X).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) assert n_clusters_2 == n_clusters validity = validity_index(X, labels) assert validity >= 0.4
def test_hdbscan_feature_vector(): labels, p, persist, ctree, ltree, mtree = hdbscan(X) n_clusters_1 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_1, n_clusters) labels = HDBSCAN().fit(X).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_2, n_clusters) validity = validity_index(X, labels) assert_greater_equal(validity, 0.4)
def test_hdbscan_distance_matrix(): D = distance.squareform(distance.pdist(X)) D /= np.max(D) labels, p, persist, ctree, ltree, mtree = hdbscan(D, metric="precomputed") # number of clusters, ignoring noise if present n_clusters_1 = len(set(labels)) - int(-1 in labels) # ignore noise assert n_clusters_1 == n_clusters labels = HDBSCAN(metric="precomputed").fit(D).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) assert n_clusters_2 == n_clusters validity = validity_index(D, labels, metric="precomputed", d=2) assert validity >= 0.6
def test_hdbscan_distance_matrix(): D = distance.squareform(distance.pdist(X)) D /= np.max(D) labels, p, persist, ctree, ltree, mtree = hdbscan(D, metric='precomputed') # number of clusters, ignoring noise if present n_clusters_1 = len(set(labels)) - int(-1 in labels) # ignore noise assert_equal(n_clusters_1, n_clusters) labels = HDBSCAN(metric="precomputed").fit(D).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_2, n_clusters) validity = validity_index(D, labels, metric='precomputed', d=2) assert_greater_equal(validity, 0.6)