Example #1
0
    def _reroot_helper(self, gtree, newCopy=True, returnEdge=False):
        """
        Yields rerooted trees.
        Adapted from phylo.recon_root.
        """

        # make a consistent unrooted copy of gene tree
        if newCopy:
            gtree = gtree.copy()

        if len(gtree.leaves()) == 2:
            raise StopIteration

        oldroot = gtree.root.name
        treelib.unroot(gtree, newCopy=False)
        treelib.reroot(gtree,
                       gtree.nodes[sorted(gtree.leaf_names())[0]].parent.name,
                       onBranch=False, newCopy=False)

        # make rerooting order consistent using hash ordering
        phylo.hash_order_tree(gtree, self.gene2species)

        # get list of edges to root on
        edges = []
        def walk(node):
            edges.append((node, node.parent))
            if not node.is_leaf():
                node.recurse(walk)
                edges.append((node, node.parent))
        for child in gtree.root.children:
            walk(child)

        # try initial root
        treelib.reroot(gtree, edges[0][0].name, newCopy=False)
        gtree.rename(gtree.root.name, oldroot)
        if returnEdge:
            yield gtree, edges[0]
        else:
            yield gtree
        rootedge = sorted(edges[0])

        # try rerooting on everything
        for edge in edges[1:]:
            if sorted(edge) == rootedge:
                continue
            rootedge = sorted(edge)

            node1, node2 = edge
            if node1.parent != node2:
                node1, node2 = node2, node1
            assert node1.parent == node2, "%s %s" % (node1.name, node2.name)

            # new root and cost
            treelib.reroot(gtree, node1.name, newCopy=False, keepName=True)
            if returnEdge:
                yield gtree, edge
            else:
                yield gtree
Example #2
0
def _test_prog_infsites():

    make_clean_dir("test/data/test_prog_infsites")

    run_cmd("""bin/arg-sim \
        -k 40 -L 200000 \
        -N 1e4 -r 1.5e-8 -m 2.5e-8 --infsites \
        --ntimes 20 --maxtime 400e3 \
        -o test/data/test_prog_infsites/0""")

    make_clean_dir("test/data/test_prog_infsites/0.sample")
    run_cmd("""bin/arg-sample \
        -s test/data/test_prog_infsites/0.sites \
        -N 1e4 -r 1.5e-8 -m 2.5e-8 \
        --ntimes 5 --maxtime 100e3 -c 1 \
        --climb 0 -n 20 --infsites \
        -x 1 \
        -o test/data/test_prog_infsites/0.sample/out""")

    arg = argweaver.read_arg(
        "test/data/test_prog_infsites/0.sample/out.0.smc.gz")
    sites = argweaver.read_sites("test/data/test_prog_infsites/0.sites")
    print "names", sites.names
    print

    noncompats = []
    for block, tree in arglib.iter_local_trees(arg):
        tree = tree.get_tree()
        treelib.remove_single_children(tree)
        phylo.hash_order_tree(tree)
        for pos, col in sites.iter_region(block[0]+1, block[1]+1):
            assert block[0]+1 <= pos <= block[1]+1, (block, pos)
            split = sites_split(sites.names, col)
            node = arglib.split_to_tree_branch(tree, split)
            if node is None:
                noncompats.append(pos)
                print "noncompat", block, pos, col
                print phylo.hash_tree(tree)
                print tree.leaf_names()
                print "".join(col[sites.names.index(name)]
                              for name in tree.leaf_names())
                print split
                print
    print "num noncompats", len(noncompats)
Example #3
0
def _test_prog_infsites():

    make_clean_dir("test/tmp/test_prog_infsites")

    run_cmd("""bin/arg-sim \
        -k 40 -L 200000 \
        -N 1e4 -r 1.5e-8 -m 2.5e-8 --infsites \
        --ntimes 20 --maxtime 400e3 \
        -o test/tmp/test_prog_infsites/0""")

    make_clean_dir("test/tmp/test_prog_infsites/0.sample")
    run_cmd("""bin/arg-sample \
        -s test/tmp/test_prog_infsites/0.sites \
        -N 1e4 -r 1.5e-8 -m 2.5e-8 \
        --ntimes 5 --maxtime 100e3 -c 1 \
        --climb 0 -n 20 --infsites \
        -x 1 \
        -o test/tmp/test_prog_infsites/0.sample/out""")

    arg = argweaver.read_arg(
        "test/tmp/test_prog_infsites/0.sample/out.0.smc.gz")
    sites = argweaver.read_sites("test/tmp/test_prog_infsites/0.sites")
    print "names", sites.names
    print

    noncompats = []
    for block, tree in arglib.iter_local_trees(arg):
        tree = tree.get_tree()
        treelib.remove_single_children(tree)
        phylo.hash_order_tree(tree)
        for pos, col in sites.iter_region(block[0] + 1, block[1] + 1):
            assert block[0] + 1 <= pos <= block[1] + 1, (block, pos)
            split = sites_split(sites.names, col)
            node = arglib.split_to_tree_branch(tree, split)
            if node is None:
                noncompats.append(pos)
                print "noncompat", block, pos, col
                print phylo.hash_tree(tree)
                print tree.leaf_names()
                print "".join(col[sites.names.index(name)]
                              for name in tree.leaf_names())
                print split
                print
    print "num noncompats", len(noncompats)