Ejemplo n.º 1
0
def binary_hierarchy_to_scipy_linkage_matrix(tree, altitudes=None, area=None):
    """
    Converts an Higra binary hierarchy to a SciPy linkage matrix.

    From SciPy documentation:

        An :math:`n-1` by 4 matrix :math:`Z` is returned.
        At the :math:`i`-th iteration, clusters with indices :math:`Z[i, 0]` and :math:`Z[i, 1]` are combined to
        form cluster :math:`n+i`.
        A cluster with an index  less than :math:`n` corresponds to one of the :math:`n` original observations.
        The distance between clusters :math:`Z[i, 0]` and :math:`Z[i, 1]` is given by :math:`Z[i, 2]`.
        The fourth value :math:`Z[i, 3]` represents the number of original observations in the newly formed cluster.

    If :attr:`altitudes` is not specified, the value provided by :func:`~higra.attribute_regular_altitudes`
    on :attr:`tree` is used.

    If :attr:`area` is not specified, the value provided by :func:`~higra.attribute_area` on :attr:`tree` is used.

    :param tree: Input tree
    :param altitudes: Tree nodes altitudes (should be increasing w.r.t tree)
    :param area: Tree nodes area (should be increasing w.r.t tree)
    :return: A linkage matrix
    """

    if altitudes is None:
        altitudes = hg.attribute_regular_altitudes(tree)

    if area is None:
        area = hg.attribute_area(tree)

    area = hg.cast_to_dtype(area, np.int64)
    return hg.cpp._binary_hierarchy_to_scipy_linkage_matrix(
        tree, altitudes, area)
Ejemplo n.º 2
0
def print_partition_tree(tree,
                         *,
                         altitudes=None,
                         attribute=None,
                         float_size=4,
                         ordering="area",
                         scale="linear",
                         return_string=False):
    """
    Print a partition tree in ASCII format.

    The tree is represented as a dendrogram oriented horizontally with the leaves on the left and the root on the right.
    Node positions are proportional to their altitudes.

    This function can be used for debugging and illustrations: it is not meant to handle large trees.

    :param tree: Input tree
    :param altitudes: Tree node altitudes (will default to :func:`~higra.attribute_regular_altitudes(tree)` if ``None``)
    :param attribute: Optional tree node attributes. If provided, the node attribute value will be printed instead
            of its altitude.
    :param float_size: Number of characters reserved for number printing.
    :param ordering: determine how the children of a node are ordered. Possible values are
            'altitudes', 'area', 'none', 'leaves'
    :param scale: scale of the x axis: 'linear' (default) or 'log'
    :param return_string: if ``True``, the string is returned instead of being printed (default ``False``)
    :return: A string if :attr:`return_string` is ``True``, ``None`` otherwise
    """
    # arbitrary !
    nleaves = tree.num_leaves()
    assert nleaves < 100, "Tree has too many leaves for pretty print!"
    if nleaves >= 10:
        leaf_format = "{:2d}"
    else:
        leaf_format = "{:1d}"

    # number printing
    float_size = max(3, int(float_size))
    half_float = max(1, float_size // 2)
    prec = max(1, float_size - 1)
    float_format = "{0:" + str(float_size) + "." + str(prec) + "g}"

    # space between two leaves
    y_spacing = 3

    # normalized altitudes determines parent/child spacing
    if altitudes is None:
        normalized_altitudes = altitudes = hg.attribute_regular_altitudes(tree)
    else:
        min_a = np.min(altitudes[tree.num_leaves():])
        normalized_altitudes = (altitudes - min_a) / (np.max(altitudes) -
                                                      min_a)
        normalized_altitudes[:tree.num_leaves()] = 0

    if scale == "log":
        normalized_altitudes = np.log(1 + normalized_altitudes) / np.log(2)
    elif scale == "linear":
        pass
    else:
        raise ValueError("Invalid scale parameter '" + scale + "'")

    # attribute is what is printed
    if attribute is None:
        attribute = altitudes

    # the minimum difference of altitudes between a child and its parent will determine the total size of the graph
    diff_altitudes = normalized_altitudes[
        tree.parents()] - normalized_altitudes
    min_diff_altitudes = np.min(diff_altitudes[np.nonzero(diff_altitudes)])

    # spacing between two succesors cannot be less than float_size + 3
    total_width = int((float_size + 3) / min_diff_altitudes + 1)
    total_height = (1 + y_spacing) * nleaves - y_spacing

    # arbitrary !
    assert total_width < 1000, "Tree is to deep for pretty print!"

    # "drawing" area
    screen = np.full((total_height, total_width + 10), ' ')

    # y positions
    yy = np.zeros((tree.num_vertices(), ))

    # area is necessary to determine how much space must be "reserved" for each child of a node
    area = hg.attribute_area(tree)

    # how leaves are sorted
    ordering_modes = {
        'altitudes': lambda cl: sorted(cl, key=lambda c: altitudes[c]),
        'area': lambda cl: sorted(cl, key=lambda c: area[c]),
        'none': lambda cl: cl,
        'leaves': None
    }

    if ordering not in ordering_modes:
        raise ValueError('Invalid ordering mode.')
    else:
        ordering = ordering_modes[ordering]

    # special case, not that the branch of the tree might self-intersect...
    if ordering is None:
        yy[:nleaves] = np.arange(0, total_height, y_spacing + 1)
        for n in tree.leaves_to_root_iterator(include_leaves=False):
            yy[n] = np.mean(yy[tree.children(n)])
    else:

        def compute_yy_rec(n, left, right):
            if tree.is_leaf(n):
                yy[n] = (left + right) / 2
            else:
                cl = ordering(tree.children(n))

                r = right - left
                ys = []
                tarea = 0
                narea = area[n]
                for i, c in enumerate(cl):
                    y = compute_yy_rec(c, left + r * tarea / narea,
                                       left + r * (tarea + area[c]) / narea)
                    ys.append(y)
                    tarea += area[c]

                yy[n] = np.mean(ys)

            return yy[n]

        compute_yy_rec(tree.root(), 0, total_height)

    # final scaling along x axis
    # because we substract the mininal non zero value in normalized altitudes,
    # we shift non leaves nodes to separate them from leaves
    xshift = half_float + 1
    x0_util = 0
    x1_util = total_width - xshift
    xr_util = x1_util - x0_util

    xx = np.round(xr_util * normalized_altitudes)
    xx[tree.num_leaves():] += xshift

    def write_string(y, x, s):
        for i, c in enumerate(s):
            screen[y, x + i] = c

    def draw_hline(y, x1, x2):
        if x1 > x2:
            x1, x2 = x2, x1
        for x in range(x1, x2 + 1):
            screen[y, x] = "-"

    def draw_vline(x, y1, y2):
        if y1 > y2:
            y1, y2 = y2, y1
        for y in range(y1, y2 + 1):
            screen[y, x] = "|"

    yy -= 1
    xx = xx.astype(np.int32)
    yy = yy.astype(np.int32)

    for n in tree.leaves_to_root_iterator(include_leaves=False):
        nx = xx[n] + half_float + 1
        ny = yy[n]
        for c in tree.children(n):
            cx = xx[c]
            if not tree.is_leaf(c):
                cx += half_float + 1
            cy = yy[c]
            draw_vline(nx, cy, ny)
            draw_hline(cy, cx, nx)

    for n in tree.leaves():
        s = leaf_format.format(n)
        write_string(yy[n], xx[n], s)

    for n in tree.leaves_to_root_iterator(include_leaves=False):
        s = float_format.format(attribute[n])
        write_string(yy[n], xx[n], s)

    r = []
    for i in range(screen.shape[0]):
        s = screen[i, :].astype('|S1').tostring().decode('utf-8')
        s = s.rstrip()
        if s != "":
            r.append(s)

    r = "\n".join(r)
    if not return_string:
        print(r)
    else:
        return r
Ejemplo n.º 3
0
 def test_regular_altitudes(self):
     t = hg.Tree((6, 6, 7, 8, 8, 8, 7, 9, 9, 9))
     ref = np.asarray((0, 0, 0, 0, 0, 0, 1 / 3, 2 / 3, 2 / 3, 1))
     res = hg.attribute_regular_altitudes(t)
     self.assertTrue(np.allclose(ref, res))
Ejemplo n.º 4
0
def random_binary_partition_tree(num_leaves, asymmetry_probability):
    """
    Random binary partition tree with a controlled amount of asymmetry/unbalancedness.

    The tree is grown from the root to the leaves.
    At each step, the algorithm randomly select one of the *growable* leaf node of the current tree.
    Two children are added to the selected node; the number of leaf nodes is hence increased by one.
    Then,

      - with probability :math:`1-asymmetry\_probability`, both new children are marked as *growable*
      - with probability :math:`asymmetry\_probability`, only one of the children is marked as *growable*

    The altitudes of the returned hierarchy are obtained with :func:`~higra.attribute_regular_altitudes`:
    *The regular altitudes is comprised between 0 and 1 and is inversely proportional to the depth of a node*.

    A valid minimal connected graph (a tree) is associated to the leaves of the tree.

    :param num_leaves: expected number of leaves in the generated tree
    :param asymmetry_probability: real value between 0 and 1. At 0 the tree is perfectly unbalanced, at 1 it is
            perfectly balanced (if :attr:`num_leaves` is  a power of 2)
    :return: a tree (Concept :class:`~higra.CptBinaryHierarchy`) and its node altitudes
    """
    import random
    import math

    assert (0 <= asymmetry_probability <= 1)
    num_leaves = int(num_leaves)
    assert (num_leaves > 0)

    parents = np.zeros((num_leaves * 2 - 1,), dtype=np.int64)

    n = 1
    root = {}
    leaves = []
    leaves.append(root)

    all_nodes = [root]

    i = parents.size - 1
    root["parent"] = i

    while n != 2 * num_leaves - 1:

        ni = random.randint(0, math.floor(asymmetry_probability * (len(leaves) - 1)))
        node = leaves[ni]
        del leaves[ni]

        node["i"] = i
        node["left"] = {"parent": i}
        node["right"] = {"parent": i}
        i -= 1
        all_nodes.append(node["left"])
        all_nodes.append(node["right"])
        n += 2

        if random.random() <= asymmetry_probability:
            if random.random() >= 0.5:
                leaves.append(node["right"])
            else:
                leaves.append(node["left"])
        else:
            leaves.append(node["left"])
            leaves.append(node["right"])

    k = 0
    for node in all_nodes:
        if "i" not in node:
            node["i"] = k
            k += 1
        parents[node["i"]] = node["parent"]

    tree = hg.Tree(parents)

    altitudes = hg.attribute_regular_altitudes(tree)

    def _get_associated_mst(tree, altitudes):
        """
        Create a valid edge mst for the given tree (returns an edge weighted undirected graph)
        """
        nb = tree.num_leaves()
        link_v = np.arange(nb)
        link_v = hg.accumulate_sequential(tree, link_v, hg.Accumulators.first)

        g = hg.UndirectedGraph(nb)
        edge_weights = np.zeros((nb - 1,), np.float32)
        for r in tree.leaves_to_root_iterator(include_leaves=False):
            g.add_edge(link_v[tree.child(0, r)], link_v[tree.child(1, r)])
            edge_weights[r - nb] = altitudes[r]

        return g, edge_weights

    mst, edge_weights = _get_associated_mst(tree, altitudes)
    mst_edge_map = np.arange(mst.num_edges())

    hg.CptHierarchy.link(tree, mst)
    hg.CptMinimumSpanningTree.link(mst, mst, mst_edge_map)
    hg.CptBinaryHierarchy.link(tree, mst_edge_map, mst)

    return tree, altitudes