def test_accumulate_at(self): indices = np.asarray((1, 1, -1, 2, 0), dtype=np.int64) weights = np.asarray((1, 2, 3, 4, 5)) res = hg.accumulate_at(indices, weights, hg.Accumulators.sum) expected_res = np.asarray((5, 3, 4)) self.assertTrue(np.all(res == expected_res)) weights_vec = np.asarray(((1, 6), (2, 7), (3, 8), (4, 9), (5, 10))) res_vec = hg.accumulate_at(indices, weights_vec, hg.Accumulators.sum) expected_res_vec = np.asarray(((5, 10), (3, 13), (4, 9))) self.assertTrue(np.all(res_vec == expected_res_vec))
def rag_accumulate_on_edges(rag, accumulator, edge_weights): """ Weights rag edges by accumulating values from the edge weights of the original graph. For any edge index :math:`ei` of the rag, :math:`result[ei] = accumulate(\{edge\_weights[j] | rag\_edge\_map[j] == ei\})` :param rag: input region adjacency graph (Concept :class:`~higra.RegionAdjacencyGraph`) :param edge_weights: edge weights on the original graph :param accumulator: see :class:`~higra.Accumulators` :return: edge weights on the region adjacency graph """ detail = hg.CptRegionAdjacencyGraph.construct(rag) new_weights = hg.accumulate_at(detail["edge_map"], edge_weights, accumulator) return new_weights
def rag_accumulate_on_vertices(rag, accumulator, vertex_weights): """ Weights rag vertices by accumulating values from the vertex weights of the original graph. For any vertex index :math:`i` of the rag, :math:`result[i] = accumulator(\{vertex\_weights[j] | rag\_vertex\_map[j] == i\})` :param rag: input region adjacency graph (Concept :class:`~higra.RegionAdjacencyGraph`) :param vertex_weights: vertex weights on the original graph :param accumulator: see :class:`~higra.Accumulators` :return: vertex weights on the region adjacency graph """ detail = hg.CptRegionAdjacencyGraph.construct(rag) vertex_weights = hg.linearize_vertex_weights(vertex_weights, detail["pre_graph"]) new_weights = hg.accumulate_at(detail["vertex_map"], vertex_weights, accumulator) return new_weights
def attribute_tree_sampling_probability(tree, leaf_graph, leaf_graph_edge_weights, model='edge'): """ Given a tree :math:`T`, estimate the probability that a node :math:`n` of the tree represents the smallest cluster containing a pair of vertices :math:`\{a, b\}` of the graph :math:`G=(V, E)` with edge weights :math:`w`. This method is defined in [1]_. We define the probability :math:`P(\{a,b\})` of a pair of vertices :math:`\{a,b\}` as :math:`w(\{a,b\}) / Z` with :math:`Z=\sum_{e\in E}w(E)` if :math:`\{a,b\}` is an edge of :math:`G` and 0 otherwise. Then the probability :math:`P(a)` of a vertex :math:`b` is defined as :math:`\sum_{b\in V}P(\{a, b\})` Two sampling strategies are proposed for sampling pairs of vertices to compute the probability of a node of the tree: - *edge*: the probability of sampling the pair :math:`\{a, b\}` is given by :math:`P(\{a, b\})`; and - *null*: the probability of sampling the pair :math:`\{a, b\}` is given by the product of the probabilities of :math:`a` and :math:`b`: :math:`P(a)*P(b)`. Assuming that the edge weights on the leaf graph of a hierarchy represents similarities: .. epigraph:: *We expect these distributions to differ significantly if the tree indeed represents the hierarchical structure of the graph. Specifically, we expect [the edge distribution] to be mostly concentrated on deep nodes of the tree (far from the root), as two nodes* :math:`u`, :math:`v` *connected with high weight* :math:`w(\{u, v\})` *in the graph typically belong to a small cluster, representative of the clustering structure of the graph; on the contrary, we expect [the null distribution] to be concentrated over shallow nodes (close to the root) as two nodes* :math:`w(\{u, v\})` *sampled independently at random typically belong to large clusters, less representative of the clustering structure of the graph*. [1]_ .. [1] Charpentier, B. & Bonald, T. (2019). `"Tree Sampling Divergence: An Information-Theoretic Metric for \ Hierarchical Graph Clustering." <https://hal.telecom-paristech.fr/hal-02144394/document>`_ Proceedings of IJCAI. :Complexity: The tree sampling divergence runtime complexity depends of the sampling model: - *edge*: :math:`\mathcal{O}(N\log(N) + M)` with :math:`N` the number of nodes in the tree and :math:`M` the number of edges in the leaf graph. - *null*: :math:`\mathcal{O}(N\\times C^2)` with :math:`N` the number of nodes in the tree and :math:`C` the maximal number of children of a node in the tree. :see: The :func:`~higra.tree_sampling_divergence` is a non supervised hierarchical cost function defined as the Kullback-Leibler divergence between the edge sampling model and the independent (null) sampling model. :param tree: Input tree :param leaf_graph: Graph defined on the leaves of the input tree :param leaf_graph_edge_weights: Edge weights of the leaf graphs (similarities) :param model: defines the edge sampling strategy, either "edge" or "null" :return: a 1d array """ if model not in ("edge", "null"): raise ValueError("Parameter 'model' must be either 'edge' or 'null'.") if model == 'edge': lca_map = hg.attribute_lca_map(tree, leaf_graph=leaf_graph) leaf_graph_edge_weights = leaf_graph_edge_weights / np.sum(leaf_graph_edge_weights) return hg.accumulate_at(lca_map, leaf_graph_edge_weights, hg.Accumulators.sum) else: # model = 'null' leaf_graph_vertex_weights = hg.accumulate_graph_edges(leaf_graph, leaf_graph_edge_weights, hg.Accumulators.sum) leaf_graph_vertex_weights = leaf_graph_vertex_weights / np.sum(leaf_graph_edge_weights) tree_node_weights = hg.accumulate_sequential(tree, leaf_graph_vertex_weights, hg.Accumulators.sum) return hg.attribute_children_pair_sum_product(tree, tree_node_weights)