def subdominant_ultrametric(graph, edge_weights, return_hierarchy=False, dtype=tc.float64):
    """ 
    Subdominant (single linkage) ultrametric of an edge weighted graph.
    
    :param graph: input graph (class ``higra.UndirectedGraph``)
    :param edge_weights: edge weights of the input graph (pytorch tensor, autograd is supported)
    :param return_hierarchy: if ``True``,  the dendrogram representing the hierarchy is also returned as a tuple ``(tree, altitudes)``
    :return: the subdominant ultrametric of the input edge weighted graph (pytorch tensor) (and the hierarchy if ``return_hierarchy`` is ``True``)
    """  
    # compute single linkage if not already provided
    
    tree, altitudes_ = hg.bpt_canonical(graph, edge_weights.detach().numpy())

    # lowest common ancestors of every edge of the graph
    lca_map = hg.attribute_lca_map(tree)
    # the following is used to map lca node indices to their corresponding edge indices in the input graph
    # associated minimum spanning
    mst = hg.get_attribute(tree, "mst")
    # map mst edges to graph edges
    mst_map = hg.get_attribute(mst, "mst_edge_map")
    # bijection between single linkage node and mst edges
    mst_idx = lca_map - tree.num_leaves()
    # mst edge indices in the input graph
    edge_idx = mst_map[mst_idx]

    altitudes = edge_weights[mst_map]
    # sanity check
    # assert(np.all(altitudes.detach().numpy() == altitudes_[tree.num_leaves():]))
    ultrametric = edge_weights[edge_idx]
    
    if return_hierarchy:
        return ultrametric, (tree, tc.cat((tc.zeros(tree.num_leaves(), dtype=dtype), altitudes)))
    else:
        return ultrametric
Ejemplo n.º 2
0
def constrained_connectivity_hierarchy_strong_connection(graph, edge_weights):
    """
    Strongly constrained connectivity hierarchy based on the given edge weighted graph.

    Let :math:`X` be a set of vertices, the range of :math:`X` is the maximal weight of the edges linking two vertices inside :math:`X`.

    Let :math:`\\alpha` be a positive real number, a set of vertices :math:`X` is :math:`\\alpha`-connected, if for any two vertices
    :math:`i` and :math:`j` in :math:`X`, there exists a path from :math:`i` to :math:`j` in :math:`X` composed of edges of weights
    lower than or equal to :math:`\\alpha`.

    Let :math:`\\alpha`  be a positive real numbers, the :math:`\\alpha`-strongly connected components of the graph are
    the maximal :math:`\\alpha'`-connected sets of vertices with a range lower than or equal to :math:`\\alpha` with :math:`\\alpha'\leq\\alpha`.

    Finally, the strongly constrained connectivity hierarchy is defined as the hierarchy composed of all the
    :math:`\\alpha`- strongly connected components for all positive :math:`\\alpha`.

    The definition used follows the one given in:

        P. Soille,
        "Constrained connectivity for hierarchical image partitioning and simplification,"
        in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 30, no. 7, pp. 1132-1145, July 2008.
        doi: 10.1109/TPAMI.2007.70817

    The algorithm runs in time :math:`\mathcal{O}(n\log(n))` and proceeds by filtering a quasi-flat zone hierarchy (see :func:`~higra.quasi_flat_zones_hierarchy`)

    :param graph: input graph
    :param edge_weights: edge_weights: edge weights of the input graph
    :return: a tree (Concept :class:`~higra.CptHierarchy`) and its node altitudes
    """

    tree, altitudes = hg.quasi_flat_zone_hierarchy(graph, edge_weights)
    altitude_parents = altitudes[tree.parents()]

    # max edge weights inside each region
    lca_map = hg.attribute_lca_map(tree)
    max_edge_weights = np.zeros((tree.num_vertices(),), dtype=edge_weights.dtype)
    np.maximum.at(max_edge_weights, lca_map, edge_weights)
    max_edge_weights = hg.accumulate_and_max_sequential(tree,
                                                        max_edge_weights,
                                                        max_edge_weights[:tree.num_leaves()],
                                                        hg.Accumulators.max)

    # parent node can't be deleted
    altitude_parents[tree.root()] = max(altitudes[tree.root()], max_edge_weights[tree.root()])

    # nodes whith a range greater than the altitudes of their parent have to be deleted
    violated_constraints = max_edge_weights >= altitude_parents

    # the altitude of nodes with a range greater than their altitude but lower than the one of their parent must be changed
    reparable_node_indices = np.nonzero(
        np.logical_and(max_edge_weights > altitudes, max_edge_weights < altitude_parents))
    altitudes[reparable_node_indices] = max_edge_weights[reparable_node_indices]

    # final  result construction
    tree, node_map = hg.simplify_tree(tree, violated_constraints)
    altitudes = altitudes[node_map]
    hg.CptHierarchy.link(tree, graph)

    return tree, altitudes
Ejemplo n.º 3
0
def loss_dasgupta(graph,
                  edge_weights,
                  ultrametric,
                  hierarchy,
                  sigmoid_param=5,
                  mode='dissimilarity'):
    """
    Relaxation of cost function defined in S. Dasgupta, A cost function for similarity-based hierarchical clustering, 2016.
    
    :param graph: input graph (``higra.UndirectedGraph``)
    :param edge_weights: edge weights of the input graph (``torch.Tensor``, autograd is supported)
    :param ultrametric; ultrametric on the input graph  (``torch.Tensor``, autograd is supported)
    :param hierarchy: optional,  if provided must be a tuple ``(tree, altitudes)`` corresponding to the result of ``higra.bpt_canonical`` on the input edge weighted graph 
    :param sigmoid_param: scale parameter used in the relaxation of the cluster size relaxation
    :param gamma: weighting of the cluster size regularization (float)
    :return: loss value as a pytorch scalar
    """

    # The following line requires that a valid C++14 compiler be installed.
    # On Windows, you should probably run
    #   c:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvars64.bat
    # to properly setup all environment variables
    from .softarea import SoftareaFunction

    # hierarchy: nodes are sorted by altitudes (from leaves to the root)
    tree, altitudes = hierarchy

    # softarea
    area = SoftareaFunction.apply(ultrametric, graph, hierarchy, sigmoid_param)

    # lowest common ancestor
    lca = hg.attribute_lca_map(tree)

    # cost function
    if mode == 'similarity':
        loss = area[lca] * edge_weights
    elif mode == 'dissimilarity':
        loss = area[lca] / edge_weights
    else:
        raise Exception("'mode' can only be 'similarity' or 'dissilarity'")

    return loss.mean()
Ejemplo n.º 4
0
def saliency(tree, altitudes, leaf_graph, handle_rag=True):
    """
    The saliency map of the input hierarchy :math:`(tree, altitudes)` for the leaf graph :math:`g` is an array of
    edge weights :math:`sm` for :math:`g` such that for each pair of adjacent vertices :math:`(i,j)` in :math:`g`,
    :math:`sm(i,j)` is equal to the ultra-metric distance between :math:`i` and :math:`j` corresponding to the hierarchy.

    Formally, this is computed using the following property: :math:`sm(i,j) = altitudes(lowest\_common\_ancestor_{tree}(i,j))`.

    Complexity: :math:`\mathcal{O}(n\log(n) + m)` with :math:`n` the number of vertices in the tree and :math:`m` the number of edges in the graph.

    :param tree: input tree (Concept :class:`~higra.CptHierarchy`)
    :param altitudes: altitudes of the vertices of the tree
    :param leaf_graph: graph whose vertex set is equal to the leaves of the input tree (deduced from :class:`~higra.CptHierarchy`)
    :param handle_rag: if tree has been constructed on a rag, then saliency values will be propagated to the original graph, hence leading to a saliency on the original graph and not on the rag
    :return: 1d array of edge weights
    """
    lca_map = hg.attribute_lca_map(tree, leaf_graph=leaf_graph)

    sm = altitudes[lca_map]
    if hg.CptRegionAdjacencyGraph.validate(leaf_graph) and handle_rag:
        sm = hg.rag_back_project_edge_weights(leaf_graph, sm)

    return sm
Ejemplo n.º 5
0
def loss_cluster_size(graph,
                      edge_weights,
                      ultrametric,
                      hierarchy,
                      top_nodes=0,
                      dtype=tc.float64):
    """
    Cluster size regularization:
    
     .. math::
    
        loss = \\frac{1}{|E|}\sum_{e_{xy}\in E}\\frac{ultrametric(e_{xy})}{\min\{|c|\, | \, c\in Children(lca(x,y))\}}
    
    :param graph: input graph (``higra.UndirectedGraph``)
    :param edge_weights: edge weights of the input graph (``torch.Tensor``, autograd is supported)
    :param ultrametric; ultrametric on the input graph  (``torch.Tensor``, autograd is supported)
    :param hierarchy: optional,  if provided must be a tuple ``(tree, altitudes)`` corresponding to the result of ``higra.bpt_canonical`` on the input edge weighted graph 
    :param top_nodes: if different from 0, only the top ``top_nodes`` of the hiearchy are taken into account in the cluster size regularization
    :return: loss value as a pytorch scalar
    
    """
    tree, altitudes = hierarchy
    lca_map = hg.attribute_lca_map(tree)

    if top_nodes <= 0:
        top_nodes = tree.num_vertices()
    top_nodes = max(tree.num_vertices() - top_nodes, tree.num_leaves())
    top_edges, = np.nonzero(lca_map >= top_nodes)

    area = hg.attribute_area(tree)
    min_area = hg.accumulate_parallel(tree, area, hg.Accumulators.min)
    min_area = min_area[lca_map[top_edges]]
    min_area = tc.tensor(min_area, dtype=dtype)

    cluster_size_loss = ultrametric[top_edges] / min_area

    return cluster_size_loss.mean()
Ejemplo n.º 6
0
def attribute_tree_sampling_probability(tree, leaf_graph, leaf_graph_edge_weights, model='edge'):
    """
    Given a tree :math:`T`, estimate the probability that a node :math:`n` of the tree represents the smallest cluster
    containing a pair of vertices :math:`\{a, b\}` of the graph :math:`G=(V, E)`
    with edge weights :math:`w`.

    This method is defined in [1]_.

    We define the probability :math:`P(\{a,b\})` of a pair of vertices :math:`\{a,b\}` as :math:`w(\{a,b\}) / Z`
    with :math:`Z=\sum_{e\in E}w(E)` if :math:`\{a,b\}` is an edge of :math:`G` and 0 otherwise.
    Then the probability :math:`P(a)` of a vertex :math:`b` is defined as :math:`\sum_{b\in V}P(\{a, b\})`

    Two sampling strategies are proposed for sampling pairs of vertices to compute the probability of a node of the tree:

    - *edge*: the probability of sampling the pair :math:`\{a, b\}` is given by :math:`P(\{a, b\})`; and
    - *null*: the probability of sampling the pair :math:`\{a, b\}` is given by the product of the probabilities
      of :math:`a` and :math:`b`: :math:`P(a)*P(b)`.

    Assuming that the edge weights on the leaf graph of a hierarchy represents similarities:

    .. epigraph::

        *We expect these distributions to differ significantly if the tree indeed represents the hierarchical structure of the graph.
        Specifically, we expect [the edge distribution] to be mostly concentrated on deep nodes of the tree
        (far from the root), as two nodes* :math:`u`, :math:`v` *connected with high weight* :math:`w(\{u, v\})` *in the graph
        typically  belong to a small cluster, representative of the clustering structure of the graph; on the contrary,
        we expect [the null distribution] to be concentrated over shallow nodes (close to the root) as two nodes*
        :math:`w(\{u, v\})` *sampled independently at random typically belong to large clusters, less representative of the
        clustering structure of the graph*. [1]_


    .. [1] Charpentier, B. & Bonald, T. (2019).  `"Tree Sampling Divergence: An Information-Theoretic Metric for \
           Hierarchical Graph Clustering." <https://hal.telecom-paristech.fr/hal-02144394/document>`_ Proceedings of IJCAI.

    :Complexity:

    The tree sampling divergence runtime complexity depends of the sampling model:

     - *edge*: :math:`\mathcal{O}(N\log(N) + M)` with :math:`N` the number of  nodes in the tree and :math:`M` the number of edges in the leaf graph.
     - *null*: :math:`\mathcal{O}(N\\times C^2)` with :math:`N` the number of nodes in the tree  and :math:`C` the maximal number of children of a node in the tree.

    :see:

    The :func:`~higra.tree_sampling_divergence` is a non supervised hierarchical cost function defined as the
    Kullback-Leibler divergence between the edge sampling model and the independent (null) sampling model.

    :param tree: Input tree
    :param leaf_graph: Graph defined on the leaves of the input tree
    :param leaf_graph_edge_weights: Edge weights of the leaf graphs (similarities)
    :param model: defines the edge sampling strategy, either "edge" or "null"
    :return: a 1d array
    """
    if model not in ("edge", "null"):
        raise ValueError("Parameter 'model' must be either 'edge' or 'null'.")

    if model == 'edge':
        lca_map = hg.attribute_lca_map(tree, leaf_graph=leaf_graph)
        leaf_graph_edge_weights = leaf_graph_edge_weights / np.sum(leaf_graph_edge_weights)
        return hg.accumulate_at(lca_map, leaf_graph_edge_weights, hg.Accumulators.sum)
    else:  # model = 'null'
        leaf_graph_vertex_weights = hg.accumulate_graph_edges(leaf_graph, leaf_graph_edge_weights, hg.Accumulators.sum)
        leaf_graph_vertex_weights = leaf_graph_vertex_weights / np.sum(leaf_graph_edge_weights)
        tree_node_weights = hg.accumulate_sequential(tree, leaf_graph_vertex_weights, hg.Accumulators.sum)
        return hg.attribute_children_pair_sum_product(tree, tree_node_weights)
Ejemplo n.º 7
0
    def test_lca_map(self):
        tree, altitudes = TestAttributes.get_test_tree()

        ref_attribute = [9, 16, 14, 16, 10, 11, 16, 16, 16, 15, 12, 13]
        attribute = hg.attribute_lca_map(tree)
        self.assertTrue(np.allclose(ref_attribute, attribute))