def test_affinity_passed_to_fix_connectivity():
    # Test that the affinity parameter is actually passed to the pairwise
    # function

    size = 2
    rng = np.random.RandomState(0)
    X = rng.randn(size, size)
    mask = np.array([True, False, False, True])

    connectivity = grid_to_graph(n_x=size, n_y=size,
                                 mask=mask, return_as=np.ndarray)

    class FakeAffinity:
        def __init__(self):
            self.counter = 0

        def increment(self, *args, **kwargs):
            self.counter += 1
            return self.counter

    fa = FakeAffinity()

    linkage_tree(X, connectivity=connectivity, affinity=fa.increment)

    assert fa.counter == 3
Beispiel #2
0
def test_affinity_passed_to_fix_connectivity():
    # Test that the affinity parameter is actually passed to the pairwise
    # function

    size = 2
    rng = np.random.RandomState(0)
    X = rng.randn(size, size)
    mask = np.array([True, False, False, True])

    connectivity = grid_to_graph(n_x=size,
                                 n_y=size,
                                 mask=mask,
                                 return_as=np.ndarray)

    class FakeAffinity:
        def __init__(self):
            self.counter = 0

        def increment(self, *args, **kwargs):
            self.counter += 1
            return self.counter

    fa = FakeAffinity()

    linkage_tree(X, connectivity=connectivity, affinity=fa.increment)

    assert_equal(fa.counter, 3)
def test_linkage_misc():
    # Misc tests on linkage
    X = np.ones((5, 5))
    assert_raises(ValueError,
                  AgglomerativeClustering(linkage='foobar').fit,
                  X)
    assert_raises(ValueError, linkage_tree, X, linkage='foobar')
    assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))

    # Smoke test FeatureAgglomeration
    FeatureAgglomeration().fit(X)

    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", DeprecationWarning)
        # Use the copy argument, to raise a warning
        Ward(copy=True).fit(X)
    # We should be getting 2 warnings: one for using Ward that is
    # deprecated, one for using the copy argument
    assert_equal(len(warning_list), 2)

    # test hiearchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)
    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])

    # test hiearchical clustering on a precomputed distances matrix
    res = linkage_tree(X, affinity=manhattan_distances)
    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
Beispiel #4
0
def test_linkage_misc():
    # Misc tests on linkage
    X = np.ones((5, 5))
    assert_raises(ValueError,
                  AgglomerativeClustering(linkage='foobar').fit,
                  X)
    assert_raises(ValueError, linkage_tree, X, linkage='foobar')
    assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))

    # Smoke test FeatureAgglomeration
    FeatureAgglomeration().fit(X)

    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", UserWarning)
        # Use the copy argument, to raise a warning
        Ward(copy=True).fit(X)
    # We should be getting 2 warnings: one for using Ward that is
    # deprecated, one for using the copy argument
    assert_equal(len(warning_list), 2)

    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", UserWarning)
        # Use the copy argument, to raise a warning
        ward_tree(X, copy=True)
    # We should be getting 1 warnings: for using the copy argument
    assert_equal(len(warning_list), 1)

    # Let's test a hiearchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)
    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])
def test_linkage_misc():
    # Misc tests on linkage
    rnd = np.random.RandomState(42)
    X = rnd.normal(size=(5, 5))
    assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X)
    assert_raises(ValueError, linkage_tree, X, linkage='foo')
    assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))

    # Smoke test FeatureAgglomeration
    FeatureAgglomeration().fit(X)

    # Deprecation of Ward class
    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", DeprecationWarning)
        Ward().fit(X)
    assert_equal(len(warning_list), 1)

    # test hiearchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)
    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])

    # test hiearchical clustering on a precomputed distances matrix
    res = linkage_tree(X, affinity=manhattan_distances)
    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
def test_linkage_misc():
    # Misc tests on linkage
    rnd = np.random.RandomState(42)
    X = rnd.normal(size=(5, 5))
    assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X)
    assert_raises(ValueError, linkage_tree, X, linkage='foo')
    assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))

    # Smoke test FeatureAgglomeration
    FeatureAgglomeration().fit(X)

    # Deprecation of Ward class
    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", DeprecationWarning)
        Ward().fit(X)
    assert_equal(len(warning_list), 1)

    # test hiearchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)
    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])

    # test hiearchical clustering on a precomputed distances matrix
    res = linkage_tree(X, affinity=manhattan_distances)
    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
Beispiel #7
0
def test_linkage_misc():
    # Misc tests on linkage
    rng = np.random.RandomState(42)
    X = rng.normal(size=(5, 5))
    with pytest.raises(ValueError):
        AgglomerativeClustering(linkage='foo').fit(X)

    with pytest.raises(ValueError):
        linkage_tree(X, linkage='foo')

    with pytest.raises(ValueError):
        linkage_tree(X, connectivity=np.ones((4, 4)))

    # Smoke test FeatureAgglomeration
    FeatureAgglomeration().fit(X)

    # test hierarchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)

    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])

    # test hierarchical clustering on a precomputed distances matrix
    res = linkage_tree(X, affinity=manhattan_distances)
    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
def test_linkage_misc():
    # Misc tests on linkage
    rng = np.random.RandomState(42)
    X = rng.normal(size=(5, 5))
    assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X)
    assert_raises(ValueError, linkage_tree, X, linkage='foo')
    assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))

    # Smoke test FeatureAgglomeration
    FeatureAgglomeration().fit(X)

    # test hiearchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)

    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])

    # test hiearchical clustering on a precomputed distances matrix
    res = linkage_tree(X, affinity=manhattan_distances)
    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
def test_deprecation_of_n_components_in_linkage_tree():
    rng = np.random.RandomState(0)
    X = rng.randn(50, 100)
    # Test for warning of deprecation of n_components in linkage_tree
    children, n_nodes, n_leaves, parent = assert_warns(DeprecationWarning,
                                                       linkage_tree,
                                                       X.T,
                                                       n_components=10)
    children_t, n_nodes_t, n_leaves_t, parent_t = linkage_tree(X.T)
    assert_array_equal(children, children_t)
    assert_equal(n_nodes, n_nodes_t)
    assert_equal(n_leaves, n_leaves_t)
    assert_equal(parent, parent_t)
def test_deprecation_of_n_components_in_linkage_tree():
    rng = np.random.RandomState(0)
    X = rng.randn(50, 100)
    # Test for warning of deprecation of n_components in linkage_tree
    children, n_nodes, n_leaves, parent = assert_warns(DeprecationWarning,
                                                       linkage_tree,
                                                       X.T,
                                                       n_components=10)
    children_t, n_nodes_t, n_leaves_t, parent_t = linkage_tree(X.T)
    assert_array_equal(children, children_t)
    assert_equal(n_nodes, n_nodes_t)
    assert_equal(n_leaves, n_leaves_t)
    assert_equal(parent, parent_t)
Beispiel #11
0
def test_ward_linkage_tree_return_distance():
    """Test return_distance option on linkage and ward trees"""

    # test that return_distance when set true, gives same
    # output on both structured and unstructured clustering.
    n, p = 10, 5
    rng = np.random.RandomState(0)

    connectivity = np.ones((n, n))
    for i in range(5):
        X = .1 * rng.normal(size=(n, p))
        X -= 4. * np.arange(n)[:, np.newaxis]
        X -= X.mean(axis=1)[:, np.newaxis]

        out_unstructured = ward_tree(X, return_distance=True)
        out_structured = ward_tree(X, connectivity=connectivity,
                                   return_distance=True)

        # get children
        children_unstructured = out_unstructured[0]
        children_structured = out_structured[0]

        # check if we got the same clusters
        assert_array_equal(children_unstructured, children_structured)

        # check if the distances are the same
        dist_unstructured = out_unstructured[-1]
        dist_structured = out_structured[-1]

        assert_array_almost_equal(dist_unstructured, dist_structured)

        for linkage in ['average', 'complete']:
            structured_items = linkage_tree(
                X, connectivity=connectivity, linkage=linkage,
                return_distance=True)[-1]
            unstructured_items = linkage_tree(
                X, linkage=linkage, return_distance=True)[-1]
            structured_dist = structured_items[-1]
            unstructured_dist = unstructured_items[-1]
            structured_children = structured_items[0]
            unstructured_children = unstructured_items[0]
            assert_array_almost_equal(structured_dist, unstructured_dist)
            assert_array_almost_equal(
                structured_children, unstructured_children)

    # test on the following dataset where we know the truth
    # taken from scipy/cluster/tests/hierarchy_test_data.py
    X = np.array([[1.43054825, -7.5693489],
                  [6.95887839, 6.82293382],
                  [2.87137846, -9.68248579],
                  [7.87974764, -6.05485803],
                  [8.24018364, -6.09495602],
                  [7.39020262, 8.54004355]])
    # truth
    linkage_X_ward = np.array([[3., 4., 0.36265956, 2.],
                               [1., 5., 1.77045373, 2.],
                               [0., 2., 2.55760419, 2.],
                               [6., 8., 9.10208346, 4.],
                               [7., 9., 24.7784379, 6.]])

    linkage_X_complete = np.array(
        [[3., 4., 0.36265956, 2.],
         [1., 5., 1.77045373, 2.],
         [0., 2., 2.55760419, 2.],
         [6., 8., 6.96742194, 4.],
         [7., 9., 18.77445997, 6.]])

    linkage_X_average = np.array(
        [[3., 4., 0.36265956, 2.],
         [1., 5., 1.77045373, 2.],
         [0., 2., 2.55760419, 2.],
         [6., 8., 6.55832839, 4.],
         [7., 9., 15.44089605, 6.]])

    n_samples, n_features = np.shape(X)
    connectivity_X = np.ones((n_samples, n_samples))

    out_X_unstructured = ward_tree(X, return_distance=True)
    out_X_structured = ward_tree(X, connectivity=connectivity_X,
                                 return_distance=True)

    # check that the labels are the same
    assert_array_equal(linkage_X_ward[:, :2], out_X_unstructured[0])
    assert_array_equal(linkage_X_ward[:, :2], out_X_structured[0])

    # check that the distances are correct
    assert_array_almost_equal(linkage_X_ward[:, 2], out_X_unstructured[4])
    assert_array_almost_equal(linkage_X_ward[:, 2], out_X_structured[4])

    linkage_options = ['complete', 'average']
    X_linkage_truth = [linkage_X_complete, linkage_X_average]
    for (linkage, X_truth) in zip(linkage_options, X_linkage_truth):
        out_X_unstructured = linkage_tree(
            X, return_distance=True, linkage=linkage)
        out_X_structured = linkage_tree(
            X, connectivity=connectivity_X, linkage=linkage,
            return_distance=True)

        # check that the labels are the same
        assert_array_equal(X_truth[:, :2], out_X_unstructured[0])
        assert_array_equal(X_truth[:, :2], out_X_structured[0])

        # check that the distances are correct
        assert_array_almost_equal(X_truth[:, 2], out_X_unstructured[4])
        assert_array_almost_equal(X_truth[:, 2], out_X_structured[4])
labels = data.loc[:, 'food_group_desc']

print(X.shape)

# In[54]:

# Import library to do agglomerative clustering
from sklearn.cluster import AgglomerativeClustering

# Group foods into two clusters based on nutrients (the default number of clusters)
nut_cluster = AgglomerativeClustering(n_clusters=1,
                                      affinity='cosine',
                                      linkage='complete').fit(X.T)
children, n_components, n_leaves, parents, nut_distance = linkage_tree(
    X.T,
    n_clusters=1,
    affinity='cosine',
    linkage='complete',
    return_distance=True)

food_cluster = AgglomerativeClustering(n_clusters=1, ).fit(X)
children, n_components, n_leaves, parents, food_distance = linkage_tree(
    X, n_clusters=1, return_distance=True)

# In[55]:

plt.title('Nutrient Cluster Dendrogram')
plt.rcParams["figure.figsize"] = [10, 12]
plot_dendrogram(nut_cluster,
                distance=nut_distance,
                labels=[nut[4:] for nut in nutrients],
                orientation='right',
Beispiel #13
0
    # deprecated, one for using the copy argument
    assert_equal(len(warning_list), 2)
=======
from sklearn.utils.testing import assert_warns
>>>>>>> remote

    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", UserWarning)
        # Use the copy argument, to raise a warning
        ward_tree(X, copy=True)
    # We should be getting 1 warnings: for using the copy argument
    assert_equal(len(warning_list), 1)

    # Let's test a hiearchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)
    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])


def test_structured_linkage_tree():
    """
    Check that we obtain the correct solution for structured linkage trees.
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    # Avoiding a mask with only 'True' entries
    mask[4:7, 4:7] = 0
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    for tree_builder in _TREE_BUILDERS.values():
        children, n_components, n_leaves, parent = \