Example #1
0
    def test_accumulate_graph_edges(self):
        g = hg.get_4_adjacency_graph((2, 3))

        edge_weights = np.asarray((1, 2, 3, 4, 6, 5, 7))

        res1 = hg.accumulate_graph_edges(g, edge_weights, hg.Accumulators.max)
        ref1 = (2, 4, 6, 5, 7, 7)
        self.assertTrue(np.all(res1 == ref1))

        edge_weights2 = np.asarray(
            ((1, 6), (2, 5), (3, 4), (4, 3), (5, 2), (6, 1), (7, 9)))
        res2 = hg.accumulate_graph_edges(g, edge_weights2, hg.Accumulators.sum)
        ref2 = np.asarray(
            ((3, 11), (8, 13), (8, 6), (8, 6), (17, 13), (12, 11)))
        self.assertTrue(np.all(res2 == ref2))
Example #2
0
def attribute_contour_strength(tree, edge_weights, vertex_perimeter=None, edge_length=None, leaf_graph=None):
    """
    Strength of the contour of each node of the given tree. The strength of the contour of a node is defined as the
    mean edge weights on the contour.

    :param tree: input tree (Concept :class:`~higra.CptHierarchy`)
    :param edge_weights: edge_weights of the leaf graph
    :param vertex_perimeter: perimeter of each vertex of the leaf graph (provided by :func:`~higra.attribute_vertex_perimeter` on `leaf_graph`)
    :param edge_length: length of each edge of the leaf graph (provided by :func:`~higra.attribute_edge_length` on `leaf_graph`)
    :param leaf_graph: (deduced from :class:`~higra.CptHierarchy`)
    :return: a 1d array
    """

    if vertex_perimeter is None:
        vertex_perimeter = hg.attribute_vertex_perimeter(leaf_graph)

    if edge_length is None:
        edge_length = hg.attribute_edge_length(leaf_graph)

    perimeter = attribute_contour_length(tree, vertex_perimeter, edge_length, leaf_graph)

    # perimeter of the root may be null
    if np.isclose(perimeter[-1], 0):
        perimeter[-1] = 1

    if hg.CptRegionAdjacencyGraph.validate(leaf_graph):
        edge_weights = hg.rag_accumulate_on_edges(leaf_graph, hg.Accumulators.sum, edge_weights)

    vertex_weights_sum = hg.accumulate_graph_edges(leaf_graph, edge_weights, hg.Accumulators.sum)
    edge_weights_sum = attribute_contour_length(tree, vertex_weights_sum, edge_weights, leaf_graph)

    return edge_weights_sum / perimeter
Example #3
0
def attribute_vertex_perimeter(graph, edge_length=None):
    """
    Vertex perimeter of the given graph.
    The perimeter of a vertex is defined as the sum of the length of out-edges of the vertex.

    If the input graph has an attribute value `no_border_vertex_out_degree`, then each vertex perimeter is assumed to be
    equal to this attribute value. This is a convenient method to handle image type graphs where an outer border has to be
    considered.

    :param graph: input graph
    :param edge_length: length of the edges of the input graph (provided by :func:`~higra.attribute_edge_length` on `graph`)
    :return: a nd array
    """
    if edge_length is None:
        edge_length = hg.attribute_edge_length(graph)

    special_case_border_graph = hg.get_attribute(
        graph, "no_border_vertex_out_degree")

    if special_case_border_graph is not None:
        res = np.full((graph.num_vertices(), ),
                      special_case_border_graph,
                      dtype=np.float64)
        res = hg.delinearize_vertex_weights(res, graph)
        return res

    res = hg.accumulate_graph_edges(graph, edge_length, hg.Accumulators.sum)
    res = hg.delinearize_vertex_weights(res, graph)
    return res
Example #4
0
def attribute_tree_sampling_probability(tree, leaf_graph, leaf_graph_edge_weights, model='edge'):
    """
    Given a tree :math:`T`, estimate the probability that a node :math:`n` of the tree represents the smallest cluster
    containing a pair of vertices :math:`\{a, b\}` of the graph :math:`G=(V, E)`
    with edge weights :math:`w`.

    This method is defined in [1]_.

    We define the probability :math:`P(\{a,b\})` of a pair of vertices :math:`\{a,b\}` as :math:`w(\{a,b\}) / Z`
    with :math:`Z=\sum_{e\in E}w(E)` if :math:`\{a,b\}` is an edge of :math:`G` and 0 otherwise.
    Then the probability :math:`P(a)` of a vertex :math:`b` is defined as :math:`\sum_{b\in V}P(\{a, b\})`

    Two sampling strategies are proposed for sampling pairs of vertices to compute the probability of a node of the tree:

    - *edge*: the probability of sampling the pair :math:`\{a, b\}` is given by :math:`P(\{a, b\})`; and
    - *null*: the probability of sampling the pair :math:`\{a, b\}` is given by the product of the probabilities
      of :math:`a` and :math:`b`: :math:`P(a)*P(b)`.

    Assuming that the edge weights on the leaf graph of a hierarchy represents similarities:

    .. epigraph::

        *We expect these distributions to differ significantly if the tree indeed represents the hierarchical structure of the graph.
        Specifically, we expect [the edge distribution] to be mostly concentrated on deep nodes of the tree
        (far from the root), as two nodes* :math:`u`, :math:`v` *connected with high weight* :math:`w(\{u, v\})` *in the graph
        typically  belong to a small cluster, representative of the clustering structure of the graph; on the contrary,
        we expect [the null distribution] to be concentrated over shallow nodes (close to the root) as two nodes*
        :math:`w(\{u, v\})` *sampled independently at random typically belong to large clusters, less representative of the
        clustering structure of the graph*. [1]_


    .. [1] Charpentier, B. & Bonald, T. (2019).  `"Tree Sampling Divergence: An Information-Theoretic Metric for \
           Hierarchical Graph Clustering." <https://hal.telecom-paristech.fr/hal-02144394/document>`_ Proceedings of IJCAI.

    :Complexity:

    The tree sampling divergence runtime complexity depends of the sampling model:

     - *edge*: :math:`\mathcal{O}(N\log(N) + M)` with :math:`N` the number of  nodes in the tree and :math:`M` the number of edges in the leaf graph.
     - *null*: :math:`\mathcal{O}(N\\times C^2)` with :math:`N` the number of nodes in the tree  and :math:`C` the maximal number of children of a node in the tree.

    :see:

    The :func:`~higra.tree_sampling_divergence` is a non supervised hierarchical cost function defined as the
    Kullback-Leibler divergence between the edge sampling model and the independent (null) sampling model.

    :param tree: Input tree
    :param leaf_graph: Graph defined on the leaves of the input tree
    :param leaf_graph_edge_weights: Edge weights of the leaf graphs (similarities)
    :param model: defines the edge sampling strategy, either "edge" or "null"
    :return: a 1d array
    """
    if model not in ("edge", "null"):
        raise ValueError("Parameter 'model' must be either 'edge' or 'null'.")

    if model == 'edge':
        lca_map = hg.attribute_lca_map(tree, leaf_graph=leaf_graph)
        leaf_graph_edge_weights = leaf_graph_edge_weights / np.sum(leaf_graph_edge_weights)
        return hg.accumulate_at(lca_map, leaf_graph_edge_weights, hg.Accumulators.sum)
    else:  # model = 'null'
        leaf_graph_vertex_weights = hg.accumulate_graph_edges(leaf_graph, leaf_graph_edge_weights, hg.Accumulators.sum)
        leaf_graph_vertex_weights = leaf_graph_vertex_weights / np.sum(leaf_graph_edge_weights)
        tree_node_weights = hg.accumulate_sequential(tree, leaf_graph_vertex_weights, hg.Accumulators.sum)
        return hg.attribute_children_pair_sum_product(tree, tree_node_weights)