def read_nexus_trees(infile): """Iterate over trees from trees run file""" infile = iter(infile) # skip until translate for line in infile: if "translate" in line: break # read translate names = {} for line in infile: if "tree rep" in line: break num, name = line.strip().replace(",", "").replace(";", "").split() names[num] = name # read trees for line in chain([line], infile): if "tree rep" not in line: break tree = treelib.parse_newick(line.split("=")[1]) for oldname in tree.leaf_names(): tree.rename(oldname, names[oldname]) yield tree
def test2(self): outdir = 'test/tmp/test_vistrans/Vis_test2/' make_clean_dir(outdir) stree = treelib.parse_newick(stree_newick) tree = treelib.read_tree(treefile2) brecon = phylo.read_brecon(breconfile2, tree, stree) transsvg.draw_tree(tree, brecon, stree, filename=outdir + "tree.svg")
def draw_raxml_tree(tr, adef): util.tic("Tree to string...") treestr = raxml.tree_to_string(tr, adef) util.toc() util.tic("Drawing tree...") T = treelib.parse_newick(treestr) T2 = treelib.unroot(T) treelib.draw_tree(T2, out=sys.stdout, minlen=5, maxlen=5) util.toc()
def test_cdf_bmc_simple(self): # test cdf mrca BMC stree = treelib.parse_newick("((A:1000, B:1000):500, C:1500);") n = 1000 gene_counts = dict.fromkeys(stree.leaf_names(), 1) T = 2000 self.assertAlmostEqual( exp(coal.cdf_mrca_bounded_multicoal(gene_counts, T, stree, n)), 0.27719726132)
def test_1(self): """Test multicoal_tree on simple 4 species tree""" stree = treelib.parse_newick( "((A:1000, B:1000):500, (C:700, D:700):800);") n = 500 nsamples = 1000 tab, tops = _test_multicoal_tree(stree, n, nsamples) print repr(tab[:20].get(cols=["simple_top", "percent", "prob"])) a, b = tab[:20].cget("percent", "prob") fequals(a, b, eabs=.05)
def test3(self): outdir = 'test/tmp/test_vistrans/Vis_test3/' make_clean_dir(outdir) stree = treelib.parse_newick(stree_newick) tree = treelib.read_tree(treefile3) brecon = phylo.read_brecon(breconfile3, tree, stree) phylo.add_implied_spec_nodes_brecon(tree, brecon) phylo.write_brecon(open(outdir + 'brecon', 'w'), brecon) transsvg.draw_tree(tree, brecon, stree, filename=outdir + "tree.svg")
def _test_recon(self): # test multicoal_tree on simple 4 species tree stree = treelib.parse_newick( "((A:1000, B:1000):500, (C:700, D:700):800);") n = 500 T = 2000 nsamples = 10000 tab, tops = _test_bounded_multicoal_tree(stree, n, T, nsamples) print repr(tab[:20].get(cols=["simple_top", "percent", "prob"])) a, b = tab[:20].cget("percent", "prob") fequals(a, b, rel=.05, eabs=.005)
def test_top(self): outdir = 'test/tmp/test_coal/BMC_test_top/' make_clean_dir(outdir) stree = treelib.parse_newick( "(((A:200, E:200):800, B:1000):500, (C:700, D:700):800);") n = 500 T = 2000 nsamples = 4000 # compare top hist with simpler rejection sampling tops = {} tops2 = {} for i in xrange(nsamples): # use rejection sampling tree, recon = coal.sample_bounded_multicoal_tree_reject( stree, n, T, namefunc=lambda x: x) # sample tree tree2, recon2 = coal.sample_bounded_multicoal_tree( stree, n, T, namefunc=lambda x: x) top = phylo.hash_tree(tree) top2 = phylo.hash_tree(tree2) tops.setdefault(top, [0, tree, recon])[0] += 1 tops.setdefault(top2, [0, tree2, recon2]) tops2.setdefault(top2, [0, tree2, recon2])[0] += 1 tops2.setdefault(top, [0, tree, recon]) keys = tops.keys() x = [safelog(tops[i][0], default=0) for i in keys] y = [safelog(tops2[i][0], default=0) for i in keys] self.assertTrue(stats.corr(x, y) > .9) p = Gnuplot() p.enableOutput(False) p.plot(x, y) p.plot([min(x), max(x)], [min(x), max(x)], style="lines") p.enableOutput(True) p.save(outdir + 'plot.png')
def test_draw_tree(self): """Test tree drawing""" text = "((A:10,B:1):5,(C:2,D:3):5);" tree = treelib.parse_newick(text) out = StringIO() treelib.draw_tree(tree, scale=1, spacing=2, out=out, labelOffset=-1, minlen=1) drawing = out.getvalue() expected = '''\ /--------- A /----+ | \ B ++ | /- C \----+ \-- D ''' self.assertEqual(drawing, expected)
def test_flies(self): stree = treelib.parse_newick("""( ( ( ( ( ( dmel:5.32, ( dsec:1.89, dsim:1.89 ):3.43 ):5.91, ( dere:8.57, dyak:8.57 ):2.66 ):42.17, dana:53.40 ):2.40, ( dpse:1.37, dper:1.37 ):54.43 ):6.69, dwil:62.49 ):1.02, ( ( dmoj:32.74, dvir:32.74 ):4.37, dgri:37.11 ):26.40 );""") for node in stree: node.dist *= 1e6 * 10 n = 10e6 nsamples = 5000 tab, tops = _test_multicoal_tree(stree, n, nsamples) print repr(tab[:20].get(cols=["simple_top", "percent", "prob"])) a, b = tab[:20].cget("percent", "prob") fequals(a, b, eabs=.05)
def test(self): """Test a tree search""" tree = parse_newick("((a,b),((c,d),(e,f)))") a, b = phylo.propose_random_spr(tree) phylo.perform_spr(tree, a, b) treelib.assert_tree(tree) for i in xrange(100): top1 = phylo.hash_tree(tree) s = phylo.TreeSearchSpr(tree) s.next() top2 = phylo.hash_tree(tree) self.assertNotEqual(top1, top2) s.revert() self.assertEqual(phylo.hash_tree(tree), top1)
def test_cdf_bmc(self): # test cdf mrca BMC stree = treelib.parse_newick( "((A:1000, B:1000):500, (C:700, D:700):800);") n = 1000 gene_counts = dict.fromkeys(stree.leaf_names(), 1) T = 2000 p = exp(coal.cdf_mrca_bounded_multicoal(gene_counts, T, stree, n)) nsamples = 5000 c = 0 for i in xrange(nsamples): tree, recon = coal.sample_multicoal_tree(stree, n) if treelib.get_tree_timestamps(tree)[tree.root] < T: c += 1 p2 = c / float(nsamples) fequal(p, p2, .05)
def test(self): tree1 = parse_newick("((a,b),c)") tree2 = parse_newick("(c,(a,b))") assert (phylo.find_splits(tree1, rooted=True) == phylo.find_splits( tree2, rooted=True)) tree1 = parse_newick("((a,b),(c,d))") tree2 = parse_newick("(((c,d),a),b)") assert (phylo.find_splits(tree1) == phylo.find_splits(tree2)) assert phylo.robinson_foulds_error(tree1, tree2) == 0.0 tree1 = parse_newick("(((a,b),(c,d)),(e,f))") tree2 = parse_newick("(((a,c),(b,d)),(e,f))") self.assertAlmostEqual(phylo.robinson_foulds_error(tree1, tree2), 2 / 3.)
def test(self): tree1 = parse_newick("((a,b),c)") tree2 = parse_newick("(c,(a,b))") assert (phylo.find_splits(tree1, rooted=True) == phylo.find_splits(tree2, rooted=True)) tree1 = parse_newick("((a,b),(c,d))") tree2 = parse_newick("(((c,d),a),b)") assert (phylo.find_splits(tree1) == phylo.find_splits(tree2)) assert phylo.robinson_foulds_error(tree1, tree2) == 0.0 tree1 = parse_newick("(((a,b),(c,d)),(e,f))") tree2 = parse_newick("(((a,c),(b,d)),(e,f))") self.assertAlmostEqual(phylo.robinson_foulds_error(tree1, tree2), 2/3.)
def parse_tree(text, names=None): """Parse a newick string into a tree""" tree = treelib.parse_newick(text, read_data=parse_tree_data) return rename_tree(tree, names)
def draw_raxml_tree(self, *args, **kargs): """Draw raxml tr -- adef and tr must have been previously defined""" treestr = raxml.tree_to_string(self.tr, self.adef) tree = treelib.parse_newick(treestr) treelib.draw_tree(treelib.unroot(tree), *args, **kargs)
def test_enum_recon(self): """Recon enumeration should always produce valid recons""" expected_recons = [ {'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 1, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 1, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 1, 'd': 'd', 2: 1, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 1, 'd': 'd', 2: 2, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 1, 'd': 'd', 2: 2, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 1, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 1, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 1, 'd': 'd', 2: 1, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 1, 'd': 'd', 2: 2, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b'}, {'a': 'a', 1: 1, 'c': 'c', 3: 1, 'd': 'd', 2: 2, 'b': 'b'}, ] expected_events = [ {'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'spec', 'd': 'gene', 2: 'spec', 'b': 'gene'}, {'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'spec', 'd': 'gene', 2: 'spec', 'b': 'gene'}, {'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'spec', 'd': 'gene', 2: 'dup', 'b': 'gene'}, {'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'spec', 'd': 'gene', 2: 'dup', 'b': 'gene'}, {'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene'}, {'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene'}, {'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene'}, {'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene'}, {'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'spec', 'b': 'gene'}, {'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'spec', 'b': 'gene'}, {'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'spec', 'd': 'gene', 2: 'dup', 'b': 'gene'}, {'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'spec', 'd': 'gene', 2: 'dup', 'b': 'gene'}, {'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene'}, {'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene'}, {'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene'}, {'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene'}, {'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'spec', 'b': 'gene'}, {'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'spec', 'b': 'gene'}, ] tree = parse_newick("((a,b),(c,d))") stree = parse_newick("((a,b),(c,d))") gene2species = lambda x: x for i, (recon, events) in enumerate(phylo.enum_recon( tree, stree, depth=None, gene2species=gene2species)): phylo.assert_recon(tree, stree, recon) recon_names = dict((node.name, snode.name) for node, snode in recon.items()) event_names = dict((node.name, event) for node, event in events.items()) self.assertEqual(recon_names, expected_recons[i]) self.assertEqual(event_names, expected_events[i])
def test_enum_recon(self): """Recon enumeration should always produce valid recons""" expected_recons = [ { 'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 1, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 1, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 1, 'd': 'd', 2: 1, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 1, 'd': 'd', 2: 2, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 1, 'd': 'd', 2: 2, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 1, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 1, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 1, 'd': 'd', 2: 1, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 1, 'd': 'd', 2: 2, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 3, 'd': 'd', 2: 2, 'b': 'b' }, { 'a': 'a', 1: 1, 'c': 'c', 3: 1, 'd': 'd', 2: 2, 'b': 'b' }, ] expected_events = [ { 'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'spec', 'd': 'gene', 2: 'spec', 'b': 'gene' }, { 'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'spec', 'd': 'gene', 2: 'spec', 'b': 'gene' }, { 'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'spec', 'd': 'gene', 2: 'dup', 'b': 'gene' }, { 'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'spec', 'd': 'gene', 2: 'dup', 'b': 'gene' }, { 'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene' }, { 'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene' }, { 'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene' }, { 'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene' }, { 'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'spec', 'b': 'gene' }, { 'a': 'gene', 1: 'dup', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'spec', 'b': 'gene' }, { 'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'spec', 'd': 'gene', 2: 'dup', 'b': 'gene' }, { 'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'spec', 'd': 'gene', 2: 'dup', 'b': 'gene' }, { 'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene' }, { 'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene' }, { 'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene' }, { 'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'dup', 'b': 'gene' }, { 'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'spec', 'b': 'gene' }, { 'a': 'gene', 1: 'spec', 'c': 'gene', 3: 'dup', 'd': 'gene', 2: 'spec', 'b': 'gene' }, ] tree = parse_newick("((a,b),(c,d))") stree = parse_newick("((a,b),(c,d))") gene2species = lambda x: x for i, (recon, events) in enumerate( phylo.enum_recon(tree, stree, depth=None, gene2species=gene2species)): phylo.assert_recon(tree, stree, recon) recon_names = dict( (node.name, snode.name) for node, snode in recon.items()) event_names = dict( (node.name, event) for node, event in events.items()) self.assertEqual(recon_names, expected_recons[i]) self.assertEqual(event_names, expected_events[i])