Example #1
0
def get_backbone_tree(tree1, tree2):
    """Constain tree1 to its shared leaf set with tree2

    Parameters
    ----------
    tree1 : dendropy tree object
    tree2 : dendropy tree object

    Returns
    -------
    tree1 : dendropy tree object

    """
    tree1 = deepcopy(tree1)

    leaves1 = njmergepair.get_leaf_set(tree1)
    leaves2 = njmergepair.get_leaf_set(tree2)
    shared = list(leaves1.intersection(leaves2))

    taxa = dendropy.TaxonNamespace(shared)

    tree1.retain_taxa_with_labels(shared)
    tree1.migrate_taxon_namespace(taxa)
    tree1.encode_bipartitions()

    return tree1
Example #2
0
def map_splits_to_node_list(big_tree, lil_tree):
    """Map a split in little tree to a list of nodes in big tree

    NOTE: Because little tree is *contained* within big tree, more
          than one node in big tree can be mapped to the same split.

    Parameters
    ----------
    big_tree : dendropy tree object
    lil_tree : dendropy tree object

    Returns
    -------
    split_to_node_list : python dictionary
        keys - splits encoded as integers (read below!)
        values - lists of nodes in a dendropy tree object

    """
    lil_leafset = njmergepair.get_leaf_set(lil_tree)

    split_to_node_list = {}
    for node in big_tree.postorder_node_iter():
        big_leafset = njmergepair.get_leaf_set(node)
        shared_leafset = list(big_leafset.intersection(lil_leafset))

        if len(shared_leafset) > 0:
            split = lil_tree.taxon_namespace.taxa_bitmask(
                labels=shared_leafset)
            try:
                split_to_node_list[split].append(node)
            except KeyError:
                split_to_node_list[split] = []
                split_to_node_list[split].append(node)

    return split_to_node_list
Example #3
0
def dscmcombine(workdir, trees, mstmat, outfile):
    """
    Parameters
    ----------
    workdir : str
        working or output directory

    Results
    -------
    Nothing

    """
    # Turn matrix into graph
    graph = networkx.Graph(mstmat)

    # Merge trees
    combined_tree = None
    nodes = list(deepcopy(graph.nodes()))
    root = None
    next_roots = [nodes[0]]
    while len(nodes) > 0:
        if root is None:
            root = next_roots[0]
        neighbors = graph.neighbors(root)
        next_roots = list(set(next_roots).union(set(neighbors)))
        if len(neighbors) == 0:
            nodes.remove(root)
            next_roots.remove(root)
            root = None
        else:
            sys.stdout.write("Combining %d and %d...\n" % (root, neighbors[0]))

            if root < neighbors[0]:
                i = root
                j = neighbors[0]
            else:
                i = neighbors[0]
                j = root

            tijfile = name_treepair_file(workdir, trees[i], trees[j])
            if not os.path.exists(tijfile):
                tijfile = name_treepair_file(workdir, trees[j], trees[i])

            if combined_tree is None:
                combined_tree = dendropy.Tree.get(path=tijfile,
                                                  schema="newick")
            else:
                tij = dendropy.Tree.get(path=tijfile, schema="newick")
                combine_two_trees_via_dscm(tij, combined_tree)
                combined_tree.update_bipartitions()

            sys.stdout.write("...combined tree has %d leaves!\n" %
                             len(njmergepair.get_leaf_set(combined_tree)))

            graph.remove_edge(root, neighbors[0])

    with open(outfile, 'w') as f:
        f.write(combined_tree.as_string(schema="newick")[5:])
Example #4
0
def combine_two_trees_via_dscm(tree_AB, tree_BC):
    """Combines two trees via distance-based strict consensus merger

    A is the subset of leaves only in tree AB
    C is the subset of leaves only in tree BC
    B is the subset of leaves in both tree AB and tree BC

    Tree AB and tree BC must be equivalent on their shared leaf set B

    Parameters
    ----------
    tree_AB : dendropy tree object
    tree_BC : dendropy tree object

    Returns
    -------
    tree_BC : dendropy tree object
        tree BC with leaves from set A added

    """
    data = list(
        njmergepair.get_leaf_set(tree_AB).intersection(
            njmergepair.get_leaf_set(tree_BC)))
    taxa = dendropy.TaxonNamespace(data)

    # [incompatible] = are_two_trees_incompatible(tree_AB, tree_BC)
    # if incompatible:
    #     sys.exit("Input trees are not compatible!\n")

    backbone_tree = get_backbone_tree(tree_AB, tree_BC)
    if backbone_tree is None:
        raise Exception("Unable to extract a backbone tree!\n")

    # Root all trees at the same shared leaf --
    # required for split mapping and post-order traversal to work!
    root = backbone_tree.taxon_namespace[0].label

    node_XX = backbone_tree.find_node_with_taxon_label(root)
    node_AB = tree_AB.find_node_with_taxon_label(root)
    node_BC = tree_BC.find_node_with_taxon_label(root)

    elen_AB = node_AB.edge.length / 2.0
    elen_BC = node_BC.edge.length / 2.0

    backbone_tree.is_rooted = True
    tree_AB.is_rooted = True
    tree_BC.is_rooted = True

    backbone_tree.reroot_at_edge(node_XX.edge)
    tree_AB.reroot_at_edge(node_AB.edge)
    tree_BC.reroot_at_edge(node_BC.edge)

    node_AB.edge.length = elen_AB
    node_BC.edge.length = elen_BC

    node_AB.sibling_nodes()[0].edge.length = elen_AB
    node_BC.sibling_nodes()[0].edge.length = elen_BC

    # Map nodes based on splits in shared leaf set
    map_AB = map_splits_to_node_list(tree_AB, backbone_tree)
    map_BC = map_splits_to_node_list(tree_BC, backbone_tree)

    # Add missing taxa from AB **into** BC using the
    # distance-based SCM strategy to handle collisions
    nodes = [n for n in backbone_tree.postorder_node_iter()]
    for node in nodes[:-1]:
        clade = njmergepair.get_leaf_set(node)
        split = backbone_tree.taxon_namespace.taxa_bitmask(labels=clade)

        node_path_AB = map_AB[split]
        node_path_BC = map_BC[split]

        num_edges_AB = len(node_path_AB)
        num_edges_BC = len(node_path_BC)

        sibs_path_AB = []
        for n in node_path_AB:
            s = n.sibling_nodes()
            if len(s) > 1:
                sys.exit("Tree AB is not binary!\n")
            sibs_path_AB.append(s[0])

        sibs_path_BC = []
        for n in node_path_BC:
            s = n.sibling_nodes()
            if len(s) > 1:
                sys.exit("Tree BC is not binary!\n")
            sibs_path_BC.append(s[0])

        if num_edges_AB > 1 and num_edges_BC > 1:
            # Found a collision -- add edges from tree AB to tree BC
            # Find normalization factor for path in AB
            # ALSO compute the point of attachment for edges in the path
            # IN ORDER to identify the ORDER in which edges should be added
            elen_AB = node_path_AB[0].edge.length
            elen_path_AB = [elen_AB]
            i_AB = 1
            for node_AB in node_path_AB[1:]:
                elen_AB = elen_AB + node_AB.edge.length
                elen_path_AB.append(elen_path_AB[i_AB - 1] +
                                    node_AB.edge.length)
                i_AB = i_AB + 1

            elen_BC = node_path_BC[0].edge.length
            elen_path_BC = [elen_BC]
            i_BC = 1
            for node_BC in node_path_BC[1:]:
                elen_BC = elen_BC + node_BC.edge.length
                elen_path_BC.append(elen_path_BC[i_BC - 1] +
                                    node_BC.edge.length)
                i_BC = i_BC + 1

            if elen_AB == 0.0 or elen_BC == 0.0:
                raise Exception("Collision on path of length zero!\n")

            norm_AB = elen_BC / elen_AB

            for i in range(len(elen_path_AB)):
                elen_path_AB[i] = elen_path_AB[i] * norm_AB

            # Extract components of tree BC
            node_BC = node_path_BC[-1]
            sibs_BC = sibs_path_BC[-1]
            parent_BC = node_BC.parent_node
            parent_BC.clear_child_nodes()

            # Get node in tree BC and update branch length
            child1 = node_path_BC[0]
            elen_AB = node_path_AB[0].edge.length * norm_AB
            elen_BC = node_path_BC[0].edge.length
            if elen_AB < elen_BC:
                child1.edge.length = elen_AB
            else:
                child1.edge.length = elen_BC

            i_AB = 0
            i_BC = 0
            start = None
            while (True):
                dothis = None
                if i_AB < num_edges_AB - 1 and i_BC < num_edges_BC - 1:
                    if elen_path_AB[i_AB] == elen_path_BC[i_BC]:
                        # Add a new node created from AB and BC
                        dothis = 3
                    elif elen_path_AB[i_AB] < elen_path_BC[i_BC]:
                        # Add remaining edges from tree AB
                        dothis = 1
                    else:
                        # Add remaining edges from tree BC
                        dothis = 2
                elif i_AB < num_edges_AB - 1:
                    # Add remaining edges from tree AB
                    dothis = 1
                elif i_BC < num_edges_BC - 1:
                    # Add remaining edges from tree BC
                    dothis = 2
                else:
                    # No more edges to add!
                    break

                if dothis == 1:
                    # Adding AB only
                    child2 = sibs_path_AB[i_AB]
                    if start is None:
                        start = child1.edge.length
                    else:
                        stop = elen_path_AB[i_AB]
                        child1.edge.length = stop - start
                        start = stop
                    i_AB = i_AB + 1
                elif dothis == 2:
                    # Add BC only
                    child2 = sibs_path_BC[i_BC]
                    if start is None:
                        start = child1.edge.length
                    else:
                        stop = elen_path_BC[i_BC]
                        child1.edge.length = stop - start
                        start = stop
                    i_BC = i_BC + 1
                else:
                    # Add both AB and BC
                    child2 = dendropy.Node()
                    child2.set_child_nodes(
                        [sibs_path_AB[i_AB], sibs_path_BC[i_BC]])
                    child2.edge.length = 0.0
                    if start is None:
                        start = child1.edge.length
                    else:
                        stop = elen_path_BC[i_BC]
                        child1.edge.length = stop - start
                        start = stop
                    i_AB = i_AB + 1
                    i_BC = i_BC + 1

                child2.parent_node = None

                # Combine nodes from tree BC (node 1) and
                # tree AB or tree BC (node 2)
                next_node = dendropy.Node()
                next_node.set_child_nodes([child1, child2])

                # Set node in tree BC as next_node
                child1 = next_node

            # Recombine the three components of tree BC
            next_node.edge.length = elen_path_BC[-1] - start
            parent_BC.set_child_nodes([next_node] + [sibs_BC])
        elif num_edges_AB > 1:
            # Found edges in tree AB not in tree BC --
            # add edges from tree AB to tree BC!

            # Find normalization factor for path in AB
            elen_BC = node_path_BC[0].edge.length
            elen_AB = 0.0
            elen_path_AB = []
            for node_AB in node_path_AB:
                elen_AB = elen_AB + node_AB.edge.length
                elen_path_AB.append(node_AB.edge.length)

            if elen_AB == 0.0:
                xxxx_AB = elen_BC / num_edges_AB
                for i in range(num_edges_AB):
                    elen_path_AB[i] = xxxx_AB
            else:
                norm_AB = elen_BC / elen_AB
                for i in range(num_edges_AB):
                    elen_path_AB[i] = elen_path_AB[i] * norm_AB

            # Extract components of tree BC
            node_BC = node_path_BC[0]
            sibs_BC = sibs_path_BC[0]
            parent_BC = node_BC.parent_node
            parent_BC.clear_child_nodes()

            # Get node in tree BC and update branch length to match tree AB
            child1 = node_path_BC[0]
            child1.edge.length = elen_path_AB[0]

            # Add each nodes in tree AB to tree BC
            for i_AB in range(1, num_edges_AB):
                # Remove parent from child 1
                child1.parent_node = None

                # Get node being added from tree AB!
                child2 = sibs_path_AB[i_AB - 1]
                child2.parent_node = None

                # Combine nodes from tree BC (node 1) and tree AB (node 2)
                next_node = dendropy.Node()
                next_node.set_child_nodes([child1, child2])
                next_node.edge.length = elen_path_AB[i_AB]

                # Set node in tree BC as next_node
                child1 = next_node

            # Recombine the three components of tree BC
            parent_BC.set_child_nodes([next_node] + [sibs_BC])
        elif num_edges_BC > 1:
            # Found edges in tree BC not in tree AB
            pass
        else:
            # Found one edge in tree BC and one edge in tree AB
            pass

    tree_BC.migrate_taxon_namespace(taxa)