Exemple #1
0
    def test_attribute_tree_sampling_probability_edge_model(self):
        g = hg.get_4_adjacency_graph((3, 3))
        edge_weights = np.asarray((0, 6, 2, 6, 0, 0, 5, 4, 5, 3, 2, 2))
        tree, altitudes = hg.quasi_flat_zone_hierarchy(g, edge_weights)
        res = hg.attribute_tree_sampling_probability(tree,
                                                     g,
                                                     edge_weights,
                                                     model='edge')

        ref = np.asarray((0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 4, 3,
                          26)) / np.sum(edge_weights)
        self.assertTrue(np.allclose(ref, res))
Exemple #2
0
def tree_sampling_divergence(tree, edge_weights, leaf_graph):
    """
    Tree sampling divergence is an unsupervised measure of the quality of a hierarchical clustering of an
    edge weighted graph.
    It measures how well the given edge weighted graph can be reconstructed from the tree alone.
    It is equal to 0 if and only if the given graph can be fully recovered from the tree.

    It is defined as the Kullback-Leibler divergence between the edge sampling model :math:`p` and the independent
    (null) sampling model :math:`q` of the nodes of a tree (see :func:`~higra.attribute_tree_sampling_probability`).

    The tree sampling divergence on a tree :math:`T` is then

    .. math::

        TSD(T) = \sum_{x \in T} p(x) \log\\frac{p(x)}{q(x)}

    The definition of the tree sampling divergence was proposed in:

        Charpentier, B. & Bonald, T. (2019).  `"Tree Sampling Divergence: An Information-Theoretic Metric for \
        Hierarchical Graph Clustering." <https://hal.telecom-paristech.fr/hal-02144394/document>`_ Proceedings of IJCAI.

    :Complexity:

    The tree sampling divergence is computed in :math:`\mathcal{O}(N (\log(N) + C^2) + M)` with :math:`N` the number of
    nodes in the tree, :math:`M` the number of edges in the leaf graph, and :math:`C` the maximal number of children of
    a node in the tree.

    :param tree: Input tree
    :param edge_weights: Edge weights on the leaf graph (similarities)
    :param leaf_graph: Leaf graph of the input tree (deduced from :class:`~higra.CptHierarchy`)
    :return: a real number
    """

    num_l = tree.num_leaves()
    p = hg.attribute_tree_sampling_probability(tree, leaf_graph, edge_weights,
                                               'edge')[num_l:]
    q = hg.attribute_tree_sampling_probability(tree, leaf_graph, edge_weights,
                                               'null')[num_l:]
    index, = np.where(p)
    return np.sum(p[index] * np.log(p[index] / q[index]))
Exemple #3
0
    def test_attribute_tree_sampling_probability_null_model(self):
        g = hg.get_4_adjacency_graph((3, 3))
        edge_weights = np.asarray((0, 6, 2, 6, 0, 0, 5, 4, 5, 3, 2, 2))
        tree, altitudes = hg.quasi_flat_zone_hierarchy(g, edge_weights)
        res = hg.attribute_tree_sampling_probability(tree,
                                                     g,
                                                     edge_weights,
                                                     model='null')

        Z = np.sum(edge_weights)
        ref = np.asarray(
            (0, 0, 0, 0, 0, 0, 0, 0, 0,
             6 * 8, 2 * 7,
             11 * 15,
             6 * 2 + 6 * 7 + 8 * 2 + 8 * 7,
             7 * 9 + 7 * 5 + 9 * 5,
             6 * 7 + 6 * 9 + 6 * 5 + 8 * 7 + 8 * 9 + 8 * 5 + 2 * 7 + 2 * 9 + 2 * 5 + 7 * 7 + 7 * 9 + 7 * 5,
             6 * 11 + 6 * 15 + 8 * 11 + 8 * 15 + 2 * 11 + 2 * 15 + 11 * 7 + 11 * 7 + 11 * 9 + 11 * 5 + 15 * 7 + 15 * 7 + 15 * 9 + 15 * 5)) / \
              (Z * Z)
        self.assertTrue(np.allclose(ref, res))