コード例 #1
0
ファイル: scm.py プロジェクト: emilhaegglund/wgfast
def _collapse_paths_not_found(f, s, other_dict=None):
    to_del = []
    for masked_split, path in f.iteritems():
        if masked_split not in s:
            for edge in path:
                if other_dict:
                    del other_dict[edge.split_bitmask]
                collapse_edge(edge)
            to_del.append(masked_split)
    for k in to_del:
        del f[k]
コード例 #2
0
ファイル: scm.py プロジェクト: nesnidal/DendroPy
def _collapse_paths_not_found(f, s, other_dict=None):
    to_del = []
    for masked_split, path in f.iteritems():
        if masked_split not in s:
            for edge in path:
                if other_dict:
                    del other_dict[edge.split_bitmask]
                collapse_edge(edge)
            to_del.append(masked_split)
    for k in to_del:
        del f[k]
コード例 #3
0
def long_branch_symmdiff(trees_to_compare,
                         edge_len_threshold,
                         copy_trees=False,
                         rooted=False):
    """Returns matrix of the symmetric_differences between trees after all
    internal edges with lengths < `edge_len_threshold` have been collapsed.

    If `copy_trees` is True then the trees will be copied first (if False, then
        the trees may will have their short edges collapsed on exit).
    """
    if copy_trees:
        tree_list = [copy.copy(i) for i in trees_to_compare]
    else:
        tree_list = list(trees_to_compare)

    n_trees = len(tree_list)
    _LOG.debug('%d Trees to compare:\n%s\n' %
               (n_trees, '\n'.join([str(i) for i in tree_list])))
    if n_trees < 2:
        return [0 for t in tree_list]

    f_r = []
    for tree in tree_list:
        to_collapse = []
        encode_splits(tree)
        for edge in tree.preorder_edge_iter(filter_fn=Edge.is_internal):
            elen = edge.length
            if elen is not None and elen < edge_len_threshold:
                to_collapse.append(edge)
        for edge in to_collapse:
            collapse_edge(edge)
        f_r.append(tree.is_rooted)
        tree.is_rooted = bool(rooted)
        encode_splits(tree)

    sd_row = [0] * n_trees
    sd_mat = [list(sd_row) for i in xrange(n_trees)]
    for i, tree_one in enumerate(tree_list[:-1]):
        for col_count, tree_two in enumerate(tree_list[1 + i:]):
            j = i + 1 + col_count
            sd = symmetric_difference(tree_one, tree_two)
            sd_mat[i][j] = sd
            sd_mat[j][i] = sd

    if not copy_trees:
        for r, tree in itertools.izip(f_r, tree_list):
            tree.is_rooted = r
    return sd_mat
コード例 #4
0
def long_branch_symmdiff(trees_to_compare, edge_len_threshold, copy_trees=False, rooted=False):
    """Returns matrix of the symmetric_differences between trees after all
    internal edges with lengths < `edge_len_threshold` have been collapsed.

    If `copy_trees` is True then the trees will be copied first (if False, then
        the trees may will have their short edges collapsed on exit).
    """
    if copy_trees:
        tree_list = [copy.copy(i) for i in trees_to_compare]
    else:
        tree_list = list(trees_to_compare)

    n_trees = len(tree_list)
    _LOG.debug('%d Trees to compare:\n%s\n' % (n_trees, '\n'.join([str(i) for i in tree_list])))
    if n_trees < 2:
        return [0 for t in tree_list]

    f_r = []
    for tree in tree_list:
        to_collapse = []
        encode_splits(tree)
        for edge in tree.preorder_edge_iter(filter_fn=Edge.is_internal):
            elen = edge.length
            if elen is not None and elen < edge_len_threshold:
                to_collapse.append(edge)
        for edge in to_collapse:
            collapse_edge(edge)
        f_r.append(tree.is_rooted)
        tree.is_rooted = bool(rooted)
        encode_splits(tree)

    sd_row = [0]*n_trees
    sd_mat = [list(sd_row) for i in xrange(n_trees)]
    for i, tree_one in enumerate(tree_list[:-1]):
        for col_count, tree_two in enumerate(tree_list[1+i:]):
            j = i + 1 + col_count
            sd = symmetric_difference(tree_one, tree_two)
            sd_mat[i][j] = sd
            sd_mat[j][i] = sd

    if not copy_trees:
        for r, tree in itertools.izip(f_r, tree_list):
            tree.is_rooted = r
    return sd_mat
コード例 #5
0
    def deroot(self):
        "Converts a degree-2 node at the root to a degree-3 node."
        seed_node = self.seed_node
        if not seed_node:
            return
        child_nodes = seed_node.child_nodes()
        if len(child_nodes) != 2:
            return

        if len(child_nodes[1].child_nodes()) >= 2:
            to_keep, to_del = child_nodes
        elif len(child_nodes[0].child_nodes()) >= 2:
            to_del, to_keep = child_nodes
        else:
            return
        to_del_edge = to_del.edge
        try:
            to_keep.edge.length += to_del_edge.length
        except:
            pass
        from dendropy.treemanip import collapse_edge
        collapse_edge(to_del_edge)
コード例 #6
0
ファイル: scm.py プロジェクト: emilhaegglund/wgfast
def add_to_scm(to_modify, to_consume, rooted=False, gordons_supertree=False):
    """Adds the tree `to_consume` to the tree `to_modify` in a strict consensus
    merge operation.  Both trees must have had encode_splits called on them."""
    assert (to_modify.taxon_set is to_consume.taxon_set)
    taxon_set = to_consume.taxon_set
    if rooted:
        raise NotImplementedError("rooted form of add_to_scm not implemented")
    to_mod_root = to_modify.seed_node
    to_mod_split = to_mod_root.edge.split_bitmask

    to_consume_root = to_consume.seed_node
    to_consume_split = to_consume_root.edge.split_bitmask

    leaf_intersection = to_mod_split & to_consume_split
    if _IS_DEBUG_LOGGING:
        _LOG.debug("add_to_scm:\n  %s\n  + %s\n%s" %
                   (str(to_modify), str(to_consume),
                    format_split(leaf_intersection, taxon_set=taxon_set)))

    n_common_leaves = count_bits(leaf_intersection)
    if n_common_leaves < 2:
        _LOG.error('trees must have at least 2 common leaves')
        raise ValueError('trees must have at least 2 common leaves')
    if n_common_leaves == 2:
        # SCM with 2 leaves in common results in a polytomy
        collapse_clade(to_mod_root)
        collapse_clade(to_consume_root)
        leaves_to_steal = [
            c for c in to_consume_root.child_nodes()
            if not (leaf_intersection & c.edge.split_bitmask)
        ]
        for leaf in leaves_to_steal:
            to_mod_root.add_child(leaf)
            to_mod_root.edge.split_bitmask |= leaf.edge.split_bitmask
        to_modify.split_edges = {
            to_mod_root.edge.split_bitmask: to_mod_root.edge
        }
        for child in to_mod_root.child_nodes():
            to_modify.split_edges[child.edge.split_bitmask] = child.edge
        return

    # at least 3 leaves in common
    tmse = to_modify.split_edges

    to_mod_relevant_splits = {}
    to_consume_relevant_splits = {}
    if not rooted:
        if _IS_DEBUG_LOGGING:
            to_modify.debug_check_tree(check_splits=True, logger_obj=_LOG)
            to_consume.debug_check_tree(check_splits=True, logger_obj=_LOG)

        reroot_on_lowest_common_index_path(to_modify, leaf_intersection)
        reroot_on_lowest_common_index_path(to_consume, leaf_intersection)

        if _IS_DEBUG_LOGGING:
            to_modify.debug_check_tree(check_splits=True, logger_obj=_LOG)
            to_consume.debug_check_tree(check_splits=True, logger_obj=_LOG)

        to_mod_root = to_modify.seed_node
        assert (to_mod_root.edge.split_bitmask == to_mod_split)
        to_consume_root = to_consume.seed_node
        assert (to_consume_root.edge.split_bitmask == to_consume_split)

    for s, e in tmse.iteritems():
        s = e.split_bitmask
        masked = s & leaf_intersection
        if masked and masked != leaf_intersection:
            e_list = to_mod_relevant_splits.setdefault(masked, [])
            e_list.append((s, e))

    for s, e in to_consume.split_edges.iteritems():
        s = e.split_bitmask
        masked = s & leaf_intersection
        if masked and masked != leaf_intersection:
            e_list = to_consume_relevant_splits.setdefault(masked, [])
            e_list.append((s, e))

    # Because each of these paths radiates away from the root (none of the paths
    #   cross the root), the split_bitmasks for deeper edges will be supersets
    #   of the split_bitmasks for shallower nodes.  Thus if we reverse sort we
    #   get the edges in the order root->tip
    for split, path in to_mod_relevant_splits.iteritems():
        path.sort(reverse=True)
        t = [i[1] for i in path]
        del path[:]
        path.extend(t)
    for split, path in to_consume_relevant_splits.iteritems():
        path.sort(reverse=True)
        t = [i[1] for i in path]
        del path[:]
        path.extend(t)
    if _IS_DEBUG_LOGGING:
        to_modify.debug_check_tree(check_splits=True, logger_obj=_LOG)
        to_consume.debug_check_tree(check_splits=True, logger_obj=_LOG)

    # first we'll collapse all paths in the common leafset in to_modify that
    #   are not in to_consume
    _collapse_paths_not_found(to_mod_relevant_splits,
                              to_consume_relevant_splits, tmse)
    # Now we'll collapse all paths in the common leafset in to_consume that
    #   are not in to_modify
    _collapse_paths_not_found(to_consume_relevant_splits,
                              to_mod_relevant_splits)

    # first we'll deal with subtrees that are:
    #       - not in the leaf intersection set, and
    #       - attached to "relevant" nodes
    # We simply move these subtrees from the to_consume tree to the appropriate
    #   node in to_modify
    to_steal = [
        i for i in to_consume_root.child_nodes()
        if (i.edge.split_bitmask & leaf_intersection) == 0
    ]
    for child in to_steal:
        to_mod_root.add_child(child)
        to_mod_root.edge.split_bitmask |= child.edge.split_bitmask

    for masked_split, to_consume_path in to_consume_relevant_splits.iteritems(
    ):
        to_mod_path = to_mod_relevant_splits.get(masked_split)
        if _IS_DEBUG_LOGGING and to_mod_path is None:  #to_mod_path is None:
            _LOG.debug("%s = mask" %
                       format_split(leaf_intersection, taxon_set=taxon_set))
            _LOG.debug("%s = masked" %
                       format_split(masked_split, taxon_set=taxon_set))
            _LOG.debug("%s = raw" % format_split(
                to_consume_path[-1].split_bitmask, taxon_set=taxon_set))
            for k, v in to_mod_relevant_splits.iteritems():
                _LOG.debug("%s in to_mod_relevant_splits" %
                           format_split(k, taxon_set=taxon_set))

        assert to_mod_path is not None
        to_mod_head = to_mod_path[-1].head_node
        to_mod_head_edge = to_mod_head.edge
        to_consume_head = to_consume_path[-1].head_node
        for child in to_consume_head.child_nodes():
            if (child.edge.split_bitmask & leaf_intersection) == 0:
                # child is the root of a subtree that has no children in the leaf_intersection
                to_mod_head.add_child(child)
                to_mod_head_edge.split_bitmask |= child.edge.split_bitmask
        if len(to_consume_path) > 1:
            if len(to_mod_path) > 1:
                # collision
                if gordons_supertree:
                    for edge in to_mod_path[2:]:
                        p = edge.tail_node
                        c = edge.head_node
                        sibs = p.child_nodes()
                        for sib in sibs:
                            _LOG.debug("sib is %s" % (sib.compose_newick()))
                            if sib is not c:
                                if not sib.is_leaf():
                                    collapse_clade(sib)
                                    collapse_edge(sib.edge)
                        collapse_edge(p.edge)
                    mid_node = to_mod_path[0].head_node
                    for edge in to_consume_path[1:]:
                        p = edge.tail_node
                        avoid = edge.head_node
                        for child in p.child_nodes():
                            _LOG.debug("child is %s" %
                                       (child.compose_newick()))
                            if child is not avoid:
                                mid_node.add_child(child)
                                collapse_clade(child)
                                if not child.is_leaf():
                                    collapse_edge(child.edge)
                                mid_node.edge.split_bitmask |= child.edge.split_bitmask
                else:
                    for edge in to_mod_path[1:-1]:
                        collapse_edge(edge)
                    mid_node = to_mod_path[0].head_node
                    for edge in to_consume_path[1:]:
                        p = edge.tail_node
                        avoid = edge.head_node
                        for child in p.child_nodes():
                            if child is not avoid:
                                mid_node.add_child(child)
                                mid_node.edge.split_bitmask |= child.edge.split_bitmask
            else:
                # we have to move the subtrees from to_consume to to_modify
                to_mod_edge = to_mod_path[0]
                to_mod_tail, to_mod_head = to_mod_edge.tail_node, to_mod_edge.head_node
                deepest_edge_to_move = to_consume_path[0]
                deepest_node_to_move = deepest_edge_to_move.head_node
                tipmost_edge_to_move = to_consume_path[-1]
                tipmost_node_to_move = tipmost_edge_to_move.tail_node
                prev_head = tipmost_edge_to_move.head_node

                to_mod_tail.add_child(deepest_node_to_move)
                to_mod_tail.remove_child(to_mod_head)
                tipmost_node_to_move.add_child(to_mod_head)
                tipmost_node_to_move.remove_child(prev_head)
    encode_splits(to_modify)
コード例 #7
0
ファイル: scm.py プロジェクト: nesnidal/DendroPy
def add_to_scm(to_modify, to_consume, rooted=False, gordons_supertree=False):
    """Adds the tree `to_consume` to the tree `to_modify` in a strict consensus
    merge operation.  Both trees must have had encode_splits called on them."""
    assert(to_modify.taxon_set is to_consume.taxon_set)
    taxon_set = to_consume.taxon_set
    if rooted:
        raise NotImplementedError("rooted form of add_to_scm not implemented")
    to_mod_root = to_modify.seed_node
    to_mod_split = to_mod_root.edge.split_bitmask

    to_consume_root = to_consume.seed_node
    to_consume_split = to_consume_root.edge.split_bitmask

    leaf_intersection = to_mod_split & to_consume_split
    if _IS_DEBUG_LOGGING:
        _LOG.debug("add_to_scm:\n  %s\n  + %s\n%s" % (str(to_modify), str(to_consume), format_split(leaf_intersection, taxon_set=taxon_set)))

    n_common_leaves = count_bits(leaf_intersection)
    if n_common_leaves < 2:
        _LOG.error('trees must have at least 2 common leaves')
        raise ValueError('trees must have at least 2 common leaves')
    if n_common_leaves == 2:
        # SCM with 2 leaves in common results in a polytomy
        collapse_clade(to_mod_root)
        collapse_clade(to_consume_root)
        leaves_to_steal = [c for c in to_consume_root.child_nodes() if not (leaf_intersection & c.edge.split_bitmask)]
        for leaf in leaves_to_steal:
            to_mod_root.add_child(leaf)
            to_mod_root.edge.split_bitmask |= leaf.edge.split_bitmask
        to_modify.split_edges = {to_mod_root.edge.split_bitmask : to_mod_root.edge}
        for child in to_mod_root.child_nodes():
            to_modify.split_edges[child.edge.split_bitmask] = child.edge
        return

    # at least 3 leaves in common
    tmse = to_modify.split_edges

    to_mod_relevant_splits = {}
    to_consume_relevant_splits = {}
    if not rooted:
        if _IS_DEBUG_LOGGING:
            to_modify.debug_check_tree(check_splits=True, logger_obj=_LOG)
            to_consume.debug_check_tree(check_splits=True, logger_obj=_LOG)

        reroot_on_lowest_common_index_path(to_modify, leaf_intersection)
        reroot_on_lowest_common_index_path(to_consume, leaf_intersection)

        if _IS_DEBUG_LOGGING:
            to_modify.debug_check_tree(check_splits=True, logger_obj=_LOG)
            to_consume.debug_check_tree(check_splits=True, logger_obj=_LOG)

        to_mod_root = to_modify.seed_node
        assert(to_mod_root.edge.split_bitmask == to_mod_split)
        to_consume_root = to_consume.seed_node
        assert(to_consume_root.edge.split_bitmask == to_consume_split)

    for s, e in tmse.iteritems():
        s = e.split_bitmask
        masked = s & leaf_intersection
        if masked and masked != leaf_intersection:
            e_list = to_mod_relevant_splits.setdefault(masked, [])
            e_list.append((s, e))

    for s, e in to_consume.split_edges.iteritems():
        s = e.split_bitmask
        masked = s & leaf_intersection
        if masked and masked != leaf_intersection:
            e_list = to_consume_relevant_splits.setdefault(masked, [])
            e_list.append((s, e))

    # Because each of these paths radiates away from the root (none of the paths
    #   cross the root), the split_bitmasks for deeper edges will be supersets
    #   of the split_bitmasks for shallower nodes.  Thus if we reverse sort we
    #   get the edges in the order root->tip
    for split, path in to_mod_relevant_splits.iteritems():
        path.sort(reverse=True)
        t = [i[1] for i in path]
        del path[:]
        path.extend(t)
    for split, path in to_consume_relevant_splits.iteritems():
        path.sort(reverse=True)
        t = [i[1] for i in path]
        del path[:]
        path.extend(t)
    if _IS_DEBUG_LOGGING:
        to_modify.debug_check_tree(check_splits=True, logger_obj=_LOG)
        to_consume.debug_check_tree(check_splits=True, logger_obj=_LOG)


    # first we'll collapse all paths in the common leafset in to_modify that
    #   are not in to_consume
    _collapse_paths_not_found(to_mod_relevant_splits, to_consume_relevant_splits, tmse)
    # Now we'll collapse all paths in the common leafset in to_consume that
    #   are not in to_modify
    _collapse_paths_not_found(to_consume_relevant_splits, to_mod_relevant_splits)


    # first we'll deal with subtrees that are:
    #       - not in the leaf intersection set, and
    #       - attached to "relevant" nodes
    # We simply move these subtrees from the to_consume tree to the appropriate
    #   node in to_modify
    to_steal = [i for i in to_consume_root.child_nodes() if (i.edge.split_bitmask & leaf_intersection) == 0]
    for child in to_steal:
        to_mod_root.add_child(child)
        to_mod_root.edge.split_bitmask |= child.edge.split_bitmask

    for masked_split, to_consume_path in to_consume_relevant_splits.iteritems():
        to_mod_path = to_mod_relevant_splits.get(masked_split)
        if _IS_DEBUG_LOGGING and to_mod_path is None: #to_mod_path is None:
            _LOG.debug("%s = mask" % format_split(leaf_intersection, taxon_set=taxon_set))
            _LOG.debug("%s = masked" % format_split(masked_split, taxon_set=taxon_set))
            _LOG.debug("%s = raw" % format_split(to_consume_path[-1].split_bitmask, taxon_set=taxon_set))
            for k, v in to_mod_relevant_splits.iteritems():
                _LOG.debug("%s in to_mod_relevant_splits" % format_split(k, taxon_set=taxon_set))

        assert to_mod_path is not None
        to_mod_head = to_mod_path[-1].head_node
        to_mod_head_edge = to_mod_head.edge
        to_consume_head = to_consume_path[-1].head_node
        for child in to_consume_head.child_nodes():
            if (child.edge.split_bitmask & leaf_intersection) == 0:
                # child is the root of a subtree that has no children in the leaf_intersection
                to_mod_head.add_child(child)
                to_mod_head_edge.split_bitmask |= child.edge.split_bitmask
        if len(to_consume_path) > 1:
            if len(to_mod_path) > 1:
                # collision
                if gordons_supertree:
                    for edge in to_mod_path[2:]:
                        p = edge.tail_node
                        c = edge.head_node
                        sibs = p.child_nodes()
                        for sib in sibs:
                            _LOG.debug("sib is %s" % (sib.compose_newick()))
                            if sib is not c:
                                if not sib.is_leaf():
                                    collapse_clade(sib)
                                    collapse_edge(sib.edge)
                        collapse_edge(p.edge)
                    mid_node = to_mod_path[0].head_node
                    for edge in to_consume_path[1:]:
                        p = edge.tail_node
                        avoid = edge.head_node
                        for child in p.child_nodes():
                            _LOG.debug("child is %s" % (child.compose_newick()))
                            if child is not avoid:
                                mid_node.add_child(child)
                                collapse_clade(child)
                                if not child.is_leaf():
                                    collapse_edge(child.edge)
                                mid_node.edge.split_bitmask |= child.edge.split_bitmask
                else:
                    for edge in to_mod_path[1:-1]:
                        collapse_edge(edge)
                    mid_node = to_mod_path[0].head_node
                    for edge in to_consume_path[1:]:
                        p = edge.tail_node
                        avoid = edge.head_node
                        for child in p.child_nodes():
                            if child is not avoid:
                                mid_node.add_child(child)
                                mid_node.edge.split_bitmask |= child.edge.split_bitmask
            else:
                # we have to move the subtrees from to_consume to to_modify
                to_mod_edge = to_mod_path[0]
                to_mod_tail, to_mod_head = to_mod_edge.tail_node, to_mod_edge.head_node
                deepest_edge_to_move = to_consume_path[0]
                deepest_node_to_move = deepest_edge_to_move.head_node
                tipmost_edge_to_move = to_consume_path[-1]
                tipmost_node_to_move = tipmost_edge_to_move.tail_node
                prev_head = tipmost_edge_to_move.head_node

                to_mod_tail.add_child(deepest_node_to_move)
                to_mod_tail.remove_child(to_mod_head)
                tipmost_node_to_move.add_child(to_mod_head)
                tipmost_node_to_move.remove_child(prev_head)
    encode_splits(to_modify)