def subdominant_ultrametric(graph, edge_weights, return_hierarchy=False, dtype=tc.float64): """ Subdominant (single linkage) ultrametric of an edge weighted graph. :param graph: input graph (class ``higra.UndirectedGraph``) :param edge_weights: edge weights of the input graph (pytorch tensor, autograd is supported) :param return_hierarchy: if ``True``, the dendrogram representing the hierarchy is also returned as a tuple ``(tree, altitudes)`` :return: the subdominant ultrametric of the input edge weighted graph (pytorch tensor) (and the hierarchy if ``return_hierarchy`` is ``True``) """ # compute single linkage if not already provided tree, altitudes_ = hg.bpt_canonical(graph, edge_weights.detach().numpy()) # lowest common ancestors of every edge of the graph lca_map = hg.attribute_lca_map(tree) # the following is used to map lca node indices to their corresponding edge indices in the input graph # associated minimum spanning mst = hg.get_attribute(tree, "mst") # map mst edges to graph edges mst_map = hg.get_attribute(mst, "mst_edge_map") # bijection between single linkage node and mst edges mst_idx = lca_map - tree.num_leaves() # mst edge indices in the input graph edge_idx = mst_map[mst_idx] altitudes = edge_weights[mst_map] # sanity check # assert(np.all(altitudes.detach().numpy() == altitudes_[tree.num_leaves():])) ultrametric = edge_weights[edge_idx] if return_hierarchy: return ultrametric, (tree, tc.cat((tc.zeros(tree.num_leaves(), dtype=dtype), altitudes))) else: return ultrametric
def constrained_connectivity_hierarchy_strong_connection(graph, edge_weights): """ Strongly constrained connectivity hierarchy based on the given edge weighted graph. Let :math:`X` be a set of vertices, the range of :math:`X` is the maximal weight of the edges linking two vertices inside :math:`X`. Let :math:`\\alpha` be a positive real number, a set of vertices :math:`X` is :math:`\\alpha`-connected, if for any two vertices :math:`i` and :math:`j` in :math:`X`, there exists a path from :math:`i` to :math:`j` in :math:`X` composed of edges of weights lower than or equal to :math:`\\alpha`. Let :math:`\\alpha` be a positive real numbers, the :math:`\\alpha`-strongly connected components of the graph are the maximal :math:`\\alpha'`-connected sets of vertices with a range lower than or equal to :math:`\\alpha` with :math:`\\alpha'\leq\\alpha`. Finally, the strongly constrained connectivity hierarchy is defined as the hierarchy composed of all the :math:`\\alpha`- strongly connected components for all positive :math:`\\alpha`. The definition used follows the one given in: P. Soille, "Constrained connectivity for hierarchical image partitioning and simplification," in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 30, no. 7, pp. 1132-1145, July 2008. doi: 10.1109/TPAMI.2007.70817 The algorithm runs in time :math:`\mathcal{O}(n\log(n))` and proceeds by filtering a quasi-flat zone hierarchy (see :func:`~higra.quasi_flat_zones_hierarchy`) :param graph: input graph :param edge_weights: edge_weights: edge weights of the input graph :return: a tree (Concept :class:`~higra.CptHierarchy`) and its node altitudes """ tree, altitudes = hg.quasi_flat_zone_hierarchy(graph, edge_weights) altitude_parents = altitudes[tree.parents()] # max edge weights inside each region lca_map = hg.attribute_lca_map(tree) max_edge_weights = np.zeros((tree.num_vertices(),), dtype=edge_weights.dtype) np.maximum.at(max_edge_weights, lca_map, edge_weights) max_edge_weights = hg.accumulate_and_max_sequential(tree, max_edge_weights, max_edge_weights[:tree.num_leaves()], hg.Accumulators.max) # parent node can't be deleted altitude_parents[tree.root()] = max(altitudes[tree.root()], max_edge_weights[tree.root()]) # nodes whith a range greater than the altitudes of their parent have to be deleted violated_constraints = max_edge_weights >= altitude_parents # the altitude of nodes with a range greater than their altitude but lower than the one of their parent must be changed reparable_node_indices = np.nonzero( np.logical_and(max_edge_weights > altitudes, max_edge_weights < altitude_parents)) altitudes[reparable_node_indices] = max_edge_weights[reparable_node_indices] # final result construction tree, node_map = hg.simplify_tree(tree, violated_constraints) altitudes = altitudes[node_map] hg.CptHierarchy.link(tree, graph) return tree, altitudes
def loss_dasgupta(graph, edge_weights, ultrametric, hierarchy, sigmoid_param=5, mode='dissimilarity'): """ Relaxation of cost function defined in S. Dasgupta, A cost function for similarity-based hierarchical clustering, 2016. :param graph: input graph (``higra.UndirectedGraph``) :param edge_weights: edge weights of the input graph (``torch.Tensor``, autograd is supported) :param ultrametric; ultrametric on the input graph (``torch.Tensor``, autograd is supported) :param hierarchy: optional, if provided must be a tuple ``(tree, altitudes)`` corresponding to the result of ``higra.bpt_canonical`` on the input edge weighted graph :param sigmoid_param: scale parameter used in the relaxation of the cluster size relaxation :param gamma: weighting of the cluster size regularization (float) :return: loss value as a pytorch scalar """ # The following line requires that a valid C++14 compiler be installed. # On Windows, you should probably run # c:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvars64.bat # to properly setup all environment variables from .softarea import SoftareaFunction # hierarchy: nodes are sorted by altitudes (from leaves to the root) tree, altitudes = hierarchy # softarea area = SoftareaFunction.apply(ultrametric, graph, hierarchy, sigmoid_param) # lowest common ancestor lca = hg.attribute_lca_map(tree) # cost function if mode == 'similarity': loss = area[lca] * edge_weights elif mode == 'dissimilarity': loss = area[lca] / edge_weights else: raise Exception("'mode' can only be 'similarity' or 'dissilarity'") return loss.mean()
def saliency(tree, altitudes, leaf_graph, handle_rag=True): """ The saliency map of the input hierarchy :math:`(tree, altitudes)` for the leaf graph :math:`g` is an array of edge weights :math:`sm` for :math:`g` such that for each pair of adjacent vertices :math:`(i,j)` in :math:`g`, :math:`sm(i,j)` is equal to the ultra-metric distance between :math:`i` and :math:`j` corresponding to the hierarchy. Formally, this is computed using the following property: :math:`sm(i,j) = altitudes(lowest\_common\_ancestor_{tree}(i,j))`. Complexity: :math:`\mathcal{O}(n\log(n) + m)` with :math:`n` the number of vertices in the tree and :math:`m` the number of edges in the graph. :param tree: input tree (Concept :class:`~higra.CptHierarchy`) :param altitudes: altitudes of the vertices of the tree :param leaf_graph: graph whose vertex set is equal to the leaves of the input tree (deduced from :class:`~higra.CptHierarchy`) :param handle_rag: if tree has been constructed on a rag, then saliency values will be propagated to the original graph, hence leading to a saliency on the original graph and not on the rag :return: 1d array of edge weights """ lca_map = hg.attribute_lca_map(tree, leaf_graph=leaf_graph) sm = altitudes[lca_map] if hg.CptRegionAdjacencyGraph.validate(leaf_graph) and handle_rag: sm = hg.rag_back_project_edge_weights(leaf_graph, sm) return sm
def loss_cluster_size(graph, edge_weights, ultrametric, hierarchy, top_nodes=0, dtype=tc.float64): """ Cluster size regularization: .. math:: loss = \\frac{1}{|E|}\sum_{e_{xy}\in E}\\frac{ultrametric(e_{xy})}{\min\{|c|\, | \, c\in Children(lca(x,y))\}} :param graph: input graph (``higra.UndirectedGraph``) :param edge_weights: edge weights of the input graph (``torch.Tensor``, autograd is supported) :param ultrametric; ultrametric on the input graph (``torch.Tensor``, autograd is supported) :param hierarchy: optional, if provided must be a tuple ``(tree, altitudes)`` corresponding to the result of ``higra.bpt_canonical`` on the input edge weighted graph :param top_nodes: if different from 0, only the top ``top_nodes`` of the hiearchy are taken into account in the cluster size regularization :return: loss value as a pytorch scalar """ tree, altitudes = hierarchy lca_map = hg.attribute_lca_map(tree) if top_nodes <= 0: top_nodes = tree.num_vertices() top_nodes = max(tree.num_vertices() - top_nodes, tree.num_leaves()) top_edges, = np.nonzero(lca_map >= top_nodes) area = hg.attribute_area(tree) min_area = hg.accumulate_parallel(tree, area, hg.Accumulators.min) min_area = min_area[lca_map[top_edges]] min_area = tc.tensor(min_area, dtype=dtype) cluster_size_loss = ultrametric[top_edges] / min_area return cluster_size_loss.mean()
def attribute_tree_sampling_probability(tree, leaf_graph, leaf_graph_edge_weights, model='edge'): """ Given a tree :math:`T`, estimate the probability that a node :math:`n` of the tree represents the smallest cluster containing a pair of vertices :math:`\{a, b\}` of the graph :math:`G=(V, E)` with edge weights :math:`w`. This method is defined in [1]_. We define the probability :math:`P(\{a,b\})` of a pair of vertices :math:`\{a,b\}` as :math:`w(\{a,b\}) / Z` with :math:`Z=\sum_{e\in E}w(E)` if :math:`\{a,b\}` is an edge of :math:`G` and 0 otherwise. Then the probability :math:`P(a)` of a vertex :math:`b` is defined as :math:`\sum_{b\in V}P(\{a, b\})` Two sampling strategies are proposed for sampling pairs of vertices to compute the probability of a node of the tree: - *edge*: the probability of sampling the pair :math:`\{a, b\}` is given by :math:`P(\{a, b\})`; and - *null*: the probability of sampling the pair :math:`\{a, b\}` is given by the product of the probabilities of :math:`a` and :math:`b`: :math:`P(a)*P(b)`. Assuming that the edge weights on the leaf graph of a hierarchy represents similarities: .. epigraph:: *We expect these distributions to differ significantly if the tree indeed represents the hierarchical structure of the graph. Specifically, we expect [the edge distribution] to be mostly concentrated on deep nodes of the tree (far from the root), as two nodes* :math:`u`, :math:`v` *connected with high weight* :math:`w(\{u, v\})` *in the graph typically belong to a small cluster, representative of the clustering structure of the graph; on the contrary, we expect [the null distribution] to be concentrated over shallow nodes (close to the root) as two nodes* :math:`w(\{u, v\})` *sampled independently at random typically belong to large clusters, less representative of the clustering structure of the graph*. [1]_ .. [1] Charpentier, B. & Bonald, T. (2019). `"Tree Sampling Divergence: An Information-Theoretic Metric for \ Hierarchical Graph Clustering." <https://hal.telecom-paristech.fr/hal-02144394/document>`_ Proceedings of IJCAI. :Complexity: The tree sampling divergence runtime complexity depends of the sampling model: - *edge*: :math:`\mathcal{O}(N\log(N) + M)` with :math:`N` the number of nodes in the tree and :math:`M` the number of edges in the leaf graph. - *null*: :math:`\mathcal{O}(N\\times C^2)` with :math:`N` the number of nodes in the tree and :math:`C` the maximal number of children of a node in the tree. :see: The :func:`~higra.tree_sampling_divergence` is a non supervised hierarchical cost function defined as the Kullback-Leibler divergence between the edge sampling model and the independent (null) sampling model. :param tree: Input tree :param leaf_graph: Graph defined on the leaves of the input tree :param leaf_graph_edge_weights: Edge weights of the leaf graphs (similarities) :param model: defines the edge sampling strategy, either "edge" or "null" :return: a 1d array """ if model not in ("edge", "null"): raise ValueError("Parameter 'model' must be either 'edge' or 'null'.") if model == 'edge': lca_map = hg.attribute_lca_map(tree, leaf_graph=leaf_graph) leaf_graph_edge_weights = leaf_graph_edge_weights / np.sum(leaf_graph_edge_weights) return hg.accumulate_at(lca_map, leaf_graph_edge_weights, hg.Accumulators.sum) else: # model = 'null' leaf_graph_vertex_weights = hg.accumulate_graph_edges(leaf_graph, leaf_graph_edge_weights, hg.Accumulators.sum) leaf_graph_vertex_weights = leaf_graph_vertex_weights / np.sum(leaf_graph_edge_weights) tree_node_weights = hg.accumulate_sequential(tree, leaf_graph_vertex_weights, hg.Accumulators.sum) return hg.attribute_children_pair_sum_product(tree, tree_node_weights)
def test_lca_map(self): tree, altitudes = TestAttributes.get_test_tree() ref_attribute = [9, 16, 14, 16, 10, 11, 16, 16, 16, 15, 12, 13] attribute = hg.attribute_lca_map(tree) self.assertTrue(np.allclose(ref_attribute, attribute))