Ejemplo n.º 1
0
    def test_binary_partition_tree_average_linkage2(self):
        graph = hg.UndirectedGraph(10)
        graph.add_edges(
            (0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 7, 7),
            (3, 6, 4, 2, 5, 3, 6, 9, 7, 3, 8, 5, 9, 4, 6, 9, 7, 8, 6, 9, 8))
        edge_values = np.asarray(
            (0.87580029, 0.60123697, 0.79924759, 0.74221387, 0.75418382,
             0.66159356, 1.31856839, 0.76080612, 1.08881471, 0.98557615,
             0.61454158, 0.50913424, 0.63556478, 0.64684775, 1.14865302,
             0.81741018, 2.1591071, 0.60563004, 2.06636665, 1.35617725,
             0.83085949),
            dtype=np.float64)

        tree, altitudes = hg.binary_partition_tree_average_linkage(
            graph, edge_values)

        expected_parents = np.asarray((11, 14, 10, 13, 15, 10, 11, 18, 12, 13,
                                       12, 17, 16, 14, 15, 16, 17, 18, 18),
                                      dtype=np.int64)
        expected_altitudes = np.asarray(
            (0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.509134, 0.601237,
             0.610086, 0.635565, 0.661594, 0.732129, 0.810695, 1.241727,
             1.35874),
            dtype=np.float64)
        self.assertTrue(np.all(expected_parents == tree.parents()))
        self.assertTrue(np.allclose(expected_altitudes, altitudes))
Ejemplo n.º 2
0
    def test_binary_partition_tree_exponential_linkage_equiv(self):
        np.random.seed(10)

        g = hg.get_4_adjacency_graph((10, 10))
        edge_weights = np.random.rand(g.num_edges())
        edge_weight_weights = np.random.randint(1, 10, g.num_edges())

        tree, altitudes = hg.binary_partition_tree_exponential_linkage(
            g, edge_weights, 0, edge_weight_weights)
        t_ref, alt_ref = hg.binary_partition_tree_average_linkage(
            g, edge_weights, edge_weight_weights)
        self.assertTrue(np.all(tree.parents() == t_ref.parents()))
        self.assertTrue(np.allclose(altitudes, alt_ref))

        tree, altitudes = hg.binary_partition_tree_exponential_linkage(
            g, edge_weights, float('inf'), edge_weight_weights)
        t_ref, alt_ref = hg.binary_partition_tree_complete_linkage(
            g, edge_weights)
        self.assertTrue(np.all(tree.parents() == t_ref.parents()))
        self.assertTrue(np.allclose(altitudes, alt_ref))

        tree, altitudes = hg.binary_partition_tree_exponential_linkage(
            g, edge_weights, float('-inf'), edge_weight_weights)
        t_ref, alt_ref = hg.binary_partition_tree_single_linkage(
            g, edge_weights)
        self.assertTrue(np.all(tree.parents() == t_ref.parents()))
        self.assertTrue(np.allclose(altitudes, alt_ref))
Ejemplo n.º 3
0
def binary_partition_tree_exponential_linkage(graph,
                                              edge_weights,
                                              alpha,
                                              edge_weight_weights=None):
    """
    Binary partition tree with exponential linkage distance.

    Given a graph :math:`G=(V, E)`, with initial edge weights :math:`w` with associated weights :math:`w_2`,
    the distance :math:`d(X,Y)` between any two clusters :math:`X` and :math:`Y` is

    .. math::

         d(X,Y) = \\frac{1}{Z} \sum_{x \in X, y \in Y, \{x,y\} in E} w_2(\{x,y\}) \\times \exp(\\alpha * w(\{x,y\})) \\times w(\{x,y\})

    with :math:`Z = \sum_{x \in X, y \in Y, \{x,y\} \in E} w_2(\{x,y\}) \\times \exp(\\alpha * w(\{x,y\}))`.

    :See:

         Nishant Yadav, Ari Kobren, Nicholas Monath, Andrew Mccallum.
         `Supervised Hierarchical Clustering with Exponential Linkage <http://proceedings.mlr.press/v97/yadav19a.html>`_
         Proceedings of the 36th International Conference on Machine Learning, PMLR 97:6973-6983, 2019.

    :param graph: input graph
    :param edge_weights: edge weights of the input graph
    :param alpha: exponential parameter
    :param edge_weight_weights: weighting of edge weights of the input graph (default to an array of ones)
    :return: a tree (Concept :class:`~higra.CptHierarchy`) and its node altitudes
    """

    alpha = float(alpha)

    if edge_weight_weights is None:
        edge_weight_weights = np.ones_like(edge_weights)
    else:
        edge_weights, edge_weight_weights = hg.cast_to_common_type(
            edge_weights, edge_weight_weights)

    # special cases: improve efficiency and avoid numerical issues
    if alpha == 0:
        tree, altitudes = hg.binary_partition_tree_average_linkage(
            graph, edge_weights, edge_weight_weights)
    elif alpha == float('-inf'):
        tree, altitudes = hg.binary_partition_tree_single_linkage(
            graph, edge_weights)
    elif alpha == float('inf'):
        tree, altitudes = hg.binary_partition_tree_complete_linkage(
            graph, edge_weights)
    else:
        res = hg.cpp._binary_partition_tree_exponential_linkage(
            graph, edge_weights, alpha, edge_weight_weights)
        tree = res.tree()
        altitudes = res.altitudes()

    hg.CptHierarchy.link(tree, graph)

    return tree, altitudes
Ejemplo n.º 4
0
def sparse_avg_hac(coo_pw_sim_mat):
    """Run hac on a coo sparse matrix of edges.
    :param coo_pw_sim_mat: N by N coo matrix w/ pairwise sim matrix
    :return: Z - linkage matrix, as in scipy linkage, other meta data from higra
    """
    ugraph, edge_weights = coo_2_hg(coo_pw_sim_mat)
    t, altitudes = hg.binary_partition_tree_average_linkage(
        ugraph, edge_weights)
    Z = hg.binary_hierarchy_to_scipy_linkage_matrix(t, altitudes=altitudes)
    return Z, t, altitudes, ugraph, edge_weights
Ejemplo n.º 5
0
    def test_binary_partition_tree_average_linkage(self):
        graph = hg.get_4_adjacency_graph((3, 3))
        edge_values = np.asarray((1, 7, 2, 10, 16, 3, 11, 4, 12, 14, 5, 6),
                                 np.float32)
        edge_weights = np.asarray((7, 1, 7, 3, 2, 8, 2, 2, 2, 1, 5, 9),
                                  np.float32)
        tree, levels = hg.binary_partition_tree_average_linkage(
            graph, edge_values, edge_weights)

        expected_parents = np.asarray(
            (9, 9, 10, 11, 11, 12, 13, 13, 14, 10, 15, 12, 15, 14, 16, 16, 16),
            np.uint32)
        expected_levels = np.asarray(
            (0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 11.5, 12),
            np.float32)

        self.assertTrue(np.all(expected_parents == tree.parents()))
        self.assertTrue(np.all(expected_levels == levels))