def test_affinity_passed_to_fix_connectivity(): # Test that the affinity parameter is actually passed to the pairwise # function size = 2 rng = np.random.RandomState(0) X = rng.randn(size, size) mask = np.array([True, False, False, True]) connectivity = grid_to_graph(n_x=size, n_y=size, mask=mask, return_as=np.ndarray) class FakeAffinity: def __init__(self): self.counter = 0 def increment(self, *args, **kwargs): self.counter += 1 return self.counter fa = FakeAffinity() linkage_tree(X, connectivity=connectivity, affinity=fa.increment) assert fa.counter == 3
def test_affinity_passed_to_fix_connectivity(): # Test that the affinity parameter is actually passed to the pairwise # function size = 2 rng = np.random.RandomState(0) X = rng.randn(size, size) mask = np.array([True, False, False, True]) connectivity = grid_to_graph(n_x=size, n_y=size, mask=mask, return_as=np.ndarray) class FakeAffinity: def __init__(self): self.counter = 0 def increment(self, *args, **kwargs): self.counter += 1 return self.counter fa = FakeAffinity() linkage_tree(X, connectivity=connectivity, affinity=fa.increment) assert_equal(fa.counter, 3)
def test_linkage_misc(): # Misc tests on linkage X = np.ones((5, 5)) assert_raises(ValueError, AgglomerativeClustering(linkage='foobar').fit, X) assert_raises(ValueError, linkage_tree, X, linkage='foobar') assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4))) # Smoke test FeatureAgglomeration FeatureAgglomeration().fit(X) with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", DeprecationWarning) # Use the copy argument, to raise a warning Ward(copy=True).fit(X) # We should be getting 2 warnings: one for using Ward that is # deprecated, one for using the copy argument assert_equal(len(warning_list), 2) # test hiearchical clustering on a precomputed distances matrix dis = cosine_distances(X) res = linkage_tree(dis, affinity="precomputed") assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0]) # test hiearchical clustering on a precomputed distances matrix res = linkage_tree(X, affinity=manhattan_distances) assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
def test_linkage_misc(): # Misc tests on linkage X = np.ones((5, 5)) assert_raises(ValueError, AgglomerativeClustering(linkage='foobar').fit, X) assert_raises(ValueError, linkage_tree, X, linkage='foobar') assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4))) # Smoke test FeatureAgglomeration FeatureAgglomeration().fit(X) with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", UserWarning) # Use the copy argument, to raise a warning Ward(copy=True).fit(X) # We should be getting 2 warnings: one for using Ward that is # deprecated, one for using the copy argument assert_equal(len(warning_list), 2) with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", UserWarning) # Use the copy argument, to raise a warning ward_tree(X, copy=True) # We should be getting 1 warnings: for using the copy argument assert_equal(len(warning_list), 1) # Let's test a hiearchical clustering on a precomputed distances matrix dis = cosine_distances(X) res = linkage_tree(dis, affinity="precomputed") assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])
def test_linkage_misc(): # Misc tests on linkage rnd = np.random.RandomState(42) X = rnd.normal(size=(5, 5)) assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X) assert_raises(ValueError, linkage_tree, X, linkage='foo') assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4))) # Smoke test FeatureAgglomeration FeatureAgglomeration().fit(X) # Deprecation of Ward class with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", DeprecationWarning) Ward().fit(X) assert_equal(len(warning_list), 1) # test hiearchical clustering on a precomputed distances matrix dis = cosine_distances(X) res = linkage_tree(dis, affinity="precomputed") assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0]) # test hiearchical clustering on a precomputed distances matrix res = linkage_tree(X, affinity=manhattan_distances) assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
def test_linkage_misc(): # Misc tests on linkage rng = np.random.RandomState(42) X = rng.normal(size=(5, 5)) with pytest.raises(ValueError): AgglomerativeClustering(linkage='foo').fit(X) with pytest.raises(ValueError): linkage_tree(X, linkage='foo') with pytest.raises(ValueError): linkage_tree(X, connectivity=np.ones((4, 4))) # Smoke test FeatureAgglomeration FeatureAgglomeration().fit(X) # test hierarchical clustering on a precomputed distances matrix dis = cosine_distances(X) res = linkage_tree(dis, affinity="precomputed") assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0]) # test hierarchical clustering on a precomputed distances matrix res = linkage_tree(X, affinity=manhattan_distances) assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
def test_linkage_misc(): # Misc tests on linkage rng = np.random.RandomState(42) X = rng.normal(size=(5, 5)) assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X) assert_raises(ValueError, linkage_tree, X, linkage='foo') assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4))) # Smoke test FeatureAgglomeration FeatureAgglomeration().fit(X) # test hiearchical clustering on a precomputed distances matrix dis = cosine_distances(X) res = linkage_tree(dis, affinity="precomputed") assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0]) # test hiearchical clustering on a precomputed distances matrix res = linkage_tree(X, affinity=manhattan_distances) assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
def test_deprecation_of_n_components_in_linkage_tree(): rng = np.random.RandomState(0) X = rng.randn(50, 100) # Test for warning of deprecation of n_components in linkage_tree children, n_nodes, n_leaves, parent = assert_warns(DeprecationWarning, linkage_tree, X.T, n_components=10) children_t, n_nodes_t, n_leaves_t, parent_t = linkage_tree(X.T) assert_array_equal(children, children_t) assert_equal(n_nodes, n_nodes_t) assert_equal(n_leaves, n_leaves_t) assert_equal(parent, parent_t)
def test_ward_linkage_tree_return_distance(): """Test return_distance option on linkage and ward trees""" # test that return_distance when set true, gives same # output on both structured and unstructured clustering. n, p = 10, 5 rng = np.random.RandomState(0) connectivity = np.ones((n, n)) for i in range(5): X = .1 * rng.normal(size=(n, p)) X -= 4. * np.arange(n)[:, np.newaxis] X -= X.mean(axis=1)[:, np.newaxis] out_unstructured = ward_tree(X, return_distance=True) out_structured = ward_tree(X, connectivity=connectivity, return_distance=True) # get children children_unstructured = out_unstructured[0] children_structured = out_structured[0] # check if we got the same clusters assert_array_equal(children_unstructured, children_structured) # check if the distances are the same dist_unstructured = out_unstructured[-1] dist_structured = out_structured[-1] assert_array_almost_equal(dist_unstructured, dist_structured) for linkage in ['average', 'complete']: structured_items = linkage_tree( X, connectivity=connectivity, linkage=linkage, return_distance=True)[-1] unstructured_items = linkage_tree( X, linkage=linkage, return_distance=True)[-1] structured_dist = structured_items[-1] unstructured_dist = unstructured_items[-1] structured_children = structured_items[0] unstructured_children = unstructured_items[0] assert_array_almost_equal(structured_dist, unstructured_dist) assert_array_almost_equal( structured_children, unstructured_children) # test on the following dataset where we know the truth # taken from scipy/cluster/tests/hierarchy_test_data.py X = np.array([[1.43054825, -7.5693489], [6.95887839, 6.82293382], [2.87137846, -9.68248579], [7.87974764, -6.05485803], [8.24018364, -6.09495602], [7.39020262, 8.54004355]]) # truth linkage_X_ward = np.array([[3., 4., 0.36265956, 2.], [1., 5., 1.77045373, 2.], [0., 2., 2.55760419, 2.], [6., 8., 9.10208346, 4.], [7., 9., 24.7784379, 6.]]) linkage_X_complete = np.array( [[3., 4., 0.36265956, 2.], [1., 5., 1.77045373, 2.], [0., 2., 2.55760419, 2.], [6., 8., 6.96742194, 4.], [7., 9., 18.77445997, 6.]]) linkage_X_average = np.array( [[3., 4., 0.36265956, 2.], [1., 5., 1.77045373, 2.], [0., 2., 2.55760419, 2.], [6., 8., 6.55832839, 4.], [7., 9., 15.44089605, 6.]]) n_samples, n_features = np.shape(X) connectivity_X = np.ones((n_samples, n_samples)) out_X_unstructured = ward_tree(X, return_distance=True) out_X_structured = ward_tree(X, connectivity=connectivity_X, return_distance=True) # check that the labels are the same assert_array_equal(linkage_X_ward[:, :2], out_X_unstructured[0]) assert_array_equal(linkage_X_ward[:, :2], out_X_structured[0]) # check that the distances are correct assert_array_almost_equal(linkage_X_ward[:, 2], out_X_unstructured[4]) assert_array_almost_equal(linkage_X_ward[:, 2], out_X_structured[4]) linkage_options = ['complete', 'average'] X_linkage_truth = [linkage_X_complete, linkage_X_average] for (linkage, X_truth) in zip(linkage_options, X_linkage_truth): out_X_unstructured = linkage_tree( X, return_distance=True, linkage=linkage) out_X_structured = linkage_tree( X, connectivity=connectivity_X, linkage=linkage, return_distance=True) # check that the labels are the same assert_array_equal(X_truth[:, :2], out_X_unstructured[0]) assert_array_equal(X_truth[:, :2], out_X_structured[0]) # check that the distances are correct assert_array_almost_equal(X_truth[:, 2], out_X_unstructured[4]) assert_array_almost_equal(X_truth[:, 2], out_X_structured[4])
labels = data.loc[:, 'food_group_desc'] print(X.shape) # In[54]: # Import library to do agglomerative clustering from sklearn.cluster import AgglomerativeClustering # Group foods into two clusters based on nutrients (the default number of clusters) nut_cluster = AgglomerativeClustering(n_clusters=1, affinity='cosine', linkage='complete').fit(X.T) children, n_components, n_leaves, parents, nut_distance = linkage_tree( X.T, n_clusters=1, affinity='cosine', linkage='complete', return_distance=True) food_cluster = AgglomerativeClustering(n_clusters=1, ).fit(X) children, n_components, n_leaves, parents, food_distance = linkage_tree( X, n_clusters=1, return_distance=True) # In[55]: plt.title('Nutrient Cluster Dendrogram') plt.rcParams["figure.figsize"] = [10, 12] plot_dendrogram(nut_cluster, distance=nut_distance, labels=[nut[4:] for nut in nutrients], orientation='right',
# deprecated, one for using the copy argument assert_equal(len(warning_list), 2) ======= from sklearn.utils.testing import assert_warns >>>>>>> remote with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", UserWarning) # Use the copy argument, to raise a warning ward_tree(X, copy=True) # We should be getting 1 warnings: for using the copy argument assert_equal(len(warning_list), 1) # Let's test a hiearchical clustering on a precomputed distances matrix dis = cosine_distances(X) res = linkage_tree(dis, affinity="precomputed") assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0]) def test_structured_linkage_tree(): """ Check that we obtain the correct solution for structured linkage trees. """ rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) # Avoiding a mask with only 'True' entries mask[4:7, 4:7] = 0 X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) for tree_builder in _TREE_BUILDERS.values(): children, n_components, n_leaves, parent = \