def _reroot_helper(self, gtree, newCopy=True, returnEdge=False): """ Yields rerooted trees. Adapted from phylo.recon_root. """ # make a consistent unrooted copy of gene tree if newCopy: gtree = gtree.copy() if len(gtree.leaves()) == 2: raise StopIteration oldroot = gtree.root.name treelib.unroot(gtree, newCopy=False) treelib.reroot(gtree, gtree.nodes[sorted(gtree.leaf_names())[0]].parent.name, onBranch=False, newCopy=False) # make rerooting order consistent using hash ordering phylo.hash_order_tree(gtree, self.gene2species) # get list of edges to root on edges = [] def walk(node): edges.append((node, node.parent)) if not node.is_leaf(): node.recurse(walk) edges.append((node, node.parent)) for child in gtree.root.children: walk(child) # try initial root treelib.reroot(gtree, edges[0][0].name, newCopy=False) gtree.rename(gtree.root.name, oldroot) if returnEdge: yield gtree, edges[0] else: yield gtree rootedge = sorted(edges[0]) # try rerooting on everything for edge in edges[1:]: if sorted(edge) == rootedge: continue rootedge = sorted(edge) node1, node2 = edge if node1.parent != node2: node1, node2 = node2, node1 assert node1.parent == node2, "%s %s" % (node1.name, node2.name) # new root and cost treelib.reroot(gtree, node1.name, newCopy=False, keepName=True) if returnEdge: yield gtree, edge else: yield gtree
def _test_prog_infsites(): make_clean_dir("test/data/test_prog_infsites") run_cmd("""bin/arg-sim \ -k 40 -L 200000 \ -N 1e4 -r 1.5e-8 -m 2.5e-8 --infsites \ --ntimes 20 --maxtime 400e3 \ -o test/data/test_prog_infsites/0""") make_clean_dir("test/data/test_prog_infsites/0.sample") run_cmd("""bin/arg-sample \ -s test/data/test_prog_infsites/0.sites \ -N 1e4 -r 1.5e-8 -m 2.5e-8 \ --ntimes 5 --maxtime 100e3 -c 1 \ --climb 0 -n 20 --infsites \ -x 1 \ -o test/data/test_prog_infsites/0.sample/out""") arg = argweaver.read_arg( "test/data/test_prog_infsites/0.sample/out.0.smc.gz") sites = argweaver.read_sites("test/data/test_prog_infsites/0.sites") print "names", sites.names print noncompats = [] for block, tree in arglib.iter_local_trees(arg): tree = tree.get_tree() treelib.remove_single_children(tree) phylo.hash_order_tree(tree) for pos, col in sites.iter_region(block[0]+1, block[1]+1): assert block[0]+1 <= pos <= block[1]+1, (block, pos) split = sites_split(sites.names, col) node = arglib.split_to_tree_branch(tree, split) if node is None: noncompats.append(pos) print "noncompat", block, pos, col print phylo.hash_tree(tree) print tree.leaf_names() print "".join(col[sites.names.index(name)] for name in tree.leaf_names()) print split print print "num noncompats", len(noncompats)
def _test_prog_infsites(): make_clean_dir("test/tmp/test_prog_infsites") run_cmd("""bin/arg-sim \ -k 40 -L 200000 \ -N 1e4 -r 1.5e-8 -m 2.5e-8 --infsites \ --ntimes 20 --maxtime 400e3 \ -o test/tmp/test_prog_infsites/0""") make_clean_dir("test/tmp/test_prog_infsites/0.sample") run_cmd("""bin/arg-sample \ -s test/tmp/test_prog_infsites/0.sites \ -N 1e4 -r 1.5e-8 -m 2.5e-8 \ --ntimes 5 --maxtime 100e3 -c 1 \ --climb 0 -n 20 --infsites \ -x 1 \ -o test/tmp/test_prog_infsites/0.sample/out""") arg = argweaver.read_arg( "test/tmp/test_prog_infsites/0.sample/out.0.smc.gz") sites = argweaver.read_sites("test/tmp/test_prog_infsites/0.sites") print "names", sites.names print noncompats = [] for block, tree in arglib.iter_local_trees(arg): tree = tree.get_tree() treelib.remove_single_children(tree) phylo.hash_order_tree(tree) for pos, col in sites.iter_region(block[0] + 1, block[1] + 1): assert block[0] + 1 <= pos <= block[1] + 1, (block, pos) split = sites_split(sites.names, col) node = arglib.split_to_tree_branch(tree, split) if node is None: noncompats.append(pos) print "noncompat", block, pos, col print phylo.hash_tree(tree) print tree.leaf_names() print "".join(col[sites.names.index(name)] for name in tree.leaf_names()) print split print print "num noncompats", len(noncompats)