def test_rsl_feature_vector(): labels, tree = robust_single_linkage(X, 0.2) n_clusters_1 = len(set(labels)) - int(-1 in labels) #assert_equal(n_clusters_1, n_clusters) labels = RobustSingleLinkage().fit(X).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels)
def test_rsl_prims_kdtree(): labels, tree = robust_single_linkage(X, 0.4, algorithm='prims_kdtree') n_clusters_1 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_1, n_clusters) labels = RobustSingleLinkage(algorithm='prims_kdtree').fit(X).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_2, n_clusters)
def test_rsl_boruvka_balltree(): labels, tree = robust_single_linkage(X, 0.45, algorithm='boruvka_balltree') n_clusters_1 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_1, n_clusters) labels = RobustSingleLinkage(cut=0.45, algorithm='boruvka_balltree').fit(X).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_2, n_clusters)
def test_rsl_callable_metric(): # metric is the function reference, not the string key. metric = distance.euclidean labels, tree = robust_single_linkage(X, 0.2, metric=metric) n_clusters_1 = len(set(labels)) - int(-1 in labels) #assert_equal(n_clusters_1, n_clusters) labels = RobustSingleLinkage(metric=metric).fit(X).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels)
def test_rsl_distance_matrix(): D = distance.squareform(distance.pdist(X)) D /= np.max(D) labels, tree = robust_single_linkage(D, 0.25, metric='precomputed') # number of clusters, ignoring noise if present n_clusters_1 = len(set(labels)) - int(-1 in labels) # ignore noise #assert_equal(n_clusters_1, n_clusters) labels = RobustSingleLinkage(metric="precomputed").fit(D).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels)
def test_rsl_high_dimensional(): H, y = make_blobs(n_samples=50, random_state=0, n_features=64) # H, y = shuffle(X, y, random_state=7) H = StandardScaler().fit_transform(H) labels, tree = robust_single_linkage(H, 5.5) n_clusters_1 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_1, n_clusters) labels = RobustSingleLinkage(cut=5.5, algorithm='best', metric='seuclidean', V=np.ones(H.shape[1])).fit(H).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) assert_equal(n_clusters_2, n_clusters)
def test_rsl_input_lists(): X = [[1., 2.], [3., 4.]] RobustSingleLinkage().fit(X) # must not raise exception
def test_rsl_hierarchy(): clusterer = RobustSingleLinkage().fit(X) assert clusterer.cluster_hierarchy_ is not None
def test_rsl_unavailable_hierarchy(): clusterer = RobustSingleLinkage() with warnings.catch_warnings(record=True) as w: tree = clusterer.cluster_hierarchy_ assert len(w) > 0 assert tree is None