def test_birth_death_single2_sim(self): """test the single branch prior""" duprate = 2.0 lossrate = .5 T = 1.0 stree = treelib.parse_newick("(A:1,B:1);") def gene2species(gene): return gene[:1].upper() s = stree.leaves()[0] b = birthDeathCount(1, T, duprate, lossrate) # 1 tree = treelib.parse_newick("(a,b);") recon = phylo.reconcile(tree, stree, gene2species) p = birthDeathCount(1, T, duprate, lossrate) * b p2 = exp(calcBirthDeathPrior(tree, stree, recon, duprate, lossrate)) p2 *= numRedunantTopology(tree.root, gene2species) print p, p2 fequal(p, p2) # 2 tree = treelib.parse_newick("((a,a),b);") recon = phylo.reconcile(tree, stree, gene2species) p = birthDeathCount(2, T, duprate, lossrate) * b p2 = exp(calcBirthDeathPrior(tree, stree, recon, duprate, lossrate)) p2 *= numRedunantTopology(tree.root, gene2species) print p, p2 fequal(p, p2) # 3 tree = treelib.parse_newick("(((a,a),a),b);") recon = phylo.reconcile(tree, stree, gene2species) p = birthDeathCount(3, T, duprate, lossrate) * b p2 = exp(calcBirthDeathPrior(tree, stree, recon, duprate, lossrate)) p2 *= numRedunantTopology(tree.root, gene2species) print p, p2 fequal(p, p2) # 4 tree = treelib.parse_newick("(((a,a),(a,a)),b);") recon = phylo.reconcile(tree, stree, gene2species) p = birthDeathCount(4, T, duprate, lossrate) * b / 3.0 p2 = exp(calcBirthDeathPrior(tree, stree, recon, duprate, lossrate)) p2 *= numRedunantTopology(tree.root, gene2species) print p, p2 fequal(p, p2)
def _test_branch_prior_predup(self): """Test branch prior""" prep_dir("test/output/branch_prior_predup") #out = open("test/output/branch_prior/flies.nt.approx.txt", "w") out = sys.stderr treeid = "predup" tree = read_tree("test/data/flies.predup.tree") drawTree(tree) stree = read_tree("test/data/flies.stree") gene2species = phylo.read_gene2species("test/data/flies.smap") params = spidir.read_params("test/data/flies.param") birth = .4 death = .39 pretime = 1.0 nsamples = 100 recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) p = [ spidir.branch_prior(tree, stree, recon, events, params, birth, death, pretime, nsamples, True) for i in xrange(30) ] p2 = [ spidir.branch_prior(tree, stree, recon, events, params, birth, death, pretime, nsamples, False) for i in xrange(30) ] print >> out, "\t".join( map(str, [treeid, mean(p), sdev(p), mean(p2), sdev(p2)]))
def _test_branch_prior_predup(self): """Test branch prior""" prep_dir("test/output/branch_prior_predup") # out = open("test/output/branch_prior/flies.nt.approx.txt", "w") out = sys.stderr treeid = "predup" tree = read_tree("test/data/flies.predup.tree") drawTree(tree) stree = read_tree("test/data/flies.stree") gene2species = phylo.read_gene2species("test/data/flies.smap") params = spidir.read_params("test/data/flies.param") birth = 0.4 death = 0.39 pretime = 1.0 nsamples = 100 recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) p = [ spidir.branch_prior(tree, stree, recon, events, params, birth, death, pretime, nsamples, True) for i in xrange(30) ] p2 = [ spidir.branch_prior(tree, stree, recon, events, params, birth, death, pretime, nsamples, False) for i in xrange(30) ] print >> out, "\t".join(map(str, [treeid, mean(p), sdev(p), mean(p2), sdev(p2)]))
def _test_branch_prior_samples(self): """Test branch prior""" prep_dir("test/output/branch_prior") treeids = os.listdir("test/data/flies") treeids = ["3"] for treeid in treeids: tree = read_tree("test/data/flies-duploss/%s/%s.tree" % (treeid, treeid)) print treeid draw_tree(tree) stree = read_tree("test/data/flies.stree") gene2species = phylo.read_gene2species("test/data/flies.smap") params = spidir.read_params("test/data/flies.param") birth = 0.0012 death = 0.0013 pretime = 1.0 nsamples = 100 recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) p = [ spidir.branch_prior(tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=True) for i in xrange(30) ] # row = [treeid, # mean(p), exc_default(lambda: sdev(p), INF)] print treeid, p
def test_branch_prior_simple1(self): """Test branch prior""" tree = treelib.parse_newick("((a1:1, b1:1):2, c1:3);") stree = treelib.parse_newick("((A:2, B:2):1, C:3);") gene2species = lambda x: x[0].upper() params = { "A": (1.0, 1.0), "B": (3.0, 3.0), "C": (4, 3.5), 2: (2.0, 2.0), 1: (1.0, 1.0), "baserate": (11.0, 10.0), } birth = 0.01 death = 0.02 pretime = 1.0 nsamples = 1 recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) # pd(mapdict(recon, key=lambda x: x.name, val=lambda x: x.name)) # pd(mapdict(events, key=lambda x: x.name)) p = spidir.branch_prior( tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=False, generate=1 ) tot = 0.0 gs = list(frange(0.0001, 4, 0.01)) gs = list(frange(1, 1.01, 0.01)) for g in gs: pg = invgammaPdf(g, params["baserate"]) pa = gammaPdf(tree.nodes["a1"].dist, [params["A"][0], params["A"][1] / (g * stree.nodes["A"].dist)]) pb = gammaPdf(tree.nodes["b1"].dist, [params["B"][0], params["B"][1] / (g * stree.nodes["B"].dist)]) pc = spidir.gammaSumPdf( tree.nodes["c1"].dist + tree.nodes[2].dist, 2, [params["C"][0], params[2][0]], [params["C"][1] / (g * stree.nodes["C"].dist), params[2][1] / (g * stree.nodes[2].dist)], 0.001, ) print g, pg, pa, pb, pc tot += pg * pa * pb * pc tot /= len(gs) print ( tree.nodes["c1"].dist + tree.nodes[2].dist, [params["C"][0], params[2][0]], [params["C"][1], params[2][1]], ) print "C", p print "P", log(tot)
def printMCMC(conf, i, tree, stree, gene2species, visited): if isDebug(DEBUG_LOW): recon = phylo.reconcile(tree, stree, gene2species) events = phylo.labelEvents(tree, recon) debug("\n=======================================") debug("iter:", i, " visited:", len(visited)) drawTreeLogl(tree, events=events) debug() debug()
def test_all_terms(self): """Test all terms""" prep_dir("test/output/all_terms") out = open("test/output/all_terms/flies.txt", "w") #out = sys.stderr treeids = os.listdir("test/data/flies")[:100] #treeids = ["0"] for treeid in treeids: tree = read_tree("test/data/flies/%s/%s.nt.tree" % (treeid, treeid)) align = read_fasta("test/data/flies/%s/%s.nt.align" % (treeid, treeid)) print >> out, treeid draw_tree(tree, out=out) stree = read_tree("test/data/flies.norm.stree") gene2species = phylo.read_gene2species("test/data/flies.smap") params = spidir.read_params("test/data/flies.nt.param") birth = .4 death = .39 pretime = 1.0 nsamples = 100 maxdoom = 20 bgfreq = [.258, .267, .266, .209] kappa = 1.59 recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) branchp, topp, seqlk = spidir.calc_joint_prob(align, tree, stree, recon, events, params, birth, death, pretime, bgfreq, kappa, maxdoom=maxdoom, terms=True) joint = topp + branchp + seqlk print >> out, "topp ", topp print >> out, "branchp", branchp print >> out, "seqlk ", seqlk print >> out, "joint ", joint out.close()
def test_internal_branch(self): duprate = .4 lossrate = .01 stree = treelib.parse_newick("((A:.05,B:.01):1,C:.01);") def gene2species(gene): return gene[:1].upper() tree = treelib.parse_newick("((((A1,B3),(A2,B2)),(A3,B1)),C1)") recon = phylo.reconcile(tree, stree, gene2species) #p = exp(calcBirthDeathPrior(tree, stree, recon, duprate, lossrate, # maxdoom)) p = exp(c_calcBirthDeathPrior(tree, stree, recon, duprate, lossrate)) print p, 0.0012 * 3
def evalUserTree(tree): setTreeDistances(conf, tree, distmat, labels) logl = treeLogLikelihood(conf, tree, stree, gene2species, params) thash = phylo.hash_tree(tree) if thash in visited: a, b, count = visited[thash] else: count = 0 visited[thash] = [logl, tree.copy(), count+1] if isDebug(DEBUG_LOW): debug("\nuser given tree:") recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) drawTreeLogl(tree, events=events)
def test_all_terms(self): """Test all terms""" prep_dir("test/output/all_terms") out = open("test/output/all_terms/flies.txt", "w") #out = sys.stderr treeids = os.listdir("test/data/flies")[:100] #treeids = ["0"] for treeid in treeids: tree = read_tree("test/data/flies/%s/%s.nt.tree" % (treeid, treeid)) align = read_fasta("test/data/flies/%s/%s.nt.align" % (treeid, treeid)) print >>out, treeid draw_tree(tree, out=out) stree = read_tree("test/data/flies.norm.stree") gene2species = phylo.read_gene2species("test/data/flies.smap") params = spidir.read_params("test/data/flies.nt.param") birth = .4 death = .39 pretime = 1.0 nsamples = 100 maxdoom = 20 bgfreq = [.258,.267,.266,.209] kappa = 1.59 recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) branchp, topp, seqlk = spidir.calc_joint_prob( align, tree, stree, recon, events, params, birth, death, pretime, bgfreq, kappa, maxdoom=maxdoom, terms=True) joint = topp + branchp + seqlk print >>out, "topp ", topp print >>out, "branchp", branchp print >>out, "seqlk ", seqlk print >>out, "joint ", joint out.close()
def _test_branch_prior_samples(self): """Test branch prior""" prep_dir("test/output/branch_prior") treeids = os.listdir("test/data/flies") treeids = ["3"] for treeid in treeids: tree = read_tree("test/data/flies-duploss/%s/%s.tree" % (treeid, treeid)) print treeid draw_tree(tree) stree = read_tree("test/data/flies.stree") gene2species = phylo.read_gene2species("test/data/flies.smap") params = spidir.read_params("test/data/flies.param") birth = .0012 death = .0013 pretime = 1.0 nsamples = 100 recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) p = [ spidir.branch_prior(tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=True) for i in xrange(30) ] #row = [treeid, # mean(p), exc_default(lambda: sdev(p), INF)] print treeid, p
def _test_branch_prior_approx(self): """Test branch prior""" prep_dir("test/output/branch_prior") out = open("test/output/branch_prior/flies.approx.txt", "w") out = sys.stderr treeids = os.listdir("test/data/flies") for treeid in treeids: tree = read_tree("test/data/flies-duploss/%s/%s.nt.tree" % (treeid, treeid)) print treeid draw_tree(tree) stree = read_tree("test/data/flies.stree") gene2species = phylo.read_gene2species("test/data/flies.smap") params = spidir.read_params("test/data/flies.param") birth = 0.0012 death = 0.0013 pretime = 1.0 nsamples = 100 recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) p = [ spidir.branch_prior(tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=False) for i in xrange(30) ] p2 = [ spidir.branch_prior(tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=True) for i in xrange(30) ] row = [treeid, mean(p), exc_default(lambda: sdev(p), INF), mean(p2), exc_default(lambda: sdev(p2), INF)] print >> out, "\t".join(map(str, row)) self.assert_(INF not in row and -INF not in row) out.close()
def test_branch_prior_simple1(self): """Test branch prior""" tree = treelib.parse_newick("((a1:1, b1:1):2, c1:3);") stree = treelib.parse_newick("((A:2, B:2):1, C:3);") gene2species = lambda x: x[0].upper() params = { "A": (1.0, 1.0), "B": (3.0, 3.0), "C": (4, 3.5), 2: (2.0, 2.0), 1: (1.0, 1.0), "baserate": (11.0, 10.0) } birth = .01 death = .02 pretime = 1.0 nsamples = 1 recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) #pd(mapdict(recon, key=lambda x: x.name, val=lambda x: x.name)) #pd(mapdict(events, key=lambda x: x.name)) p = spidir.branch_prior(tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=False, generate=1) tot = 0.0 gs = list(frange(.0001, 4, .01)) gs = list(frange(1, 1.01, .01)) for g in gs: pg = invgammaPdf(g, params["baserate"]) pa = gammaPdf( tree.nodes["a1"].dist, [params["A"][0], params["A"][1] / (g * stree.nodes["A"].dist)]) pb = gammaPdf( tree.nodes["b1"].dist, [params["B"][0], params["B"][1] / (g * stree.nodes["B"].dist)]) pc = spidir.gammaSumPdf( tree.nodes["c1"].dist + tree.nodes[2].dist, 2, [params["C"][0], params[2][0]], [ params["C"][1] / (g * stree.nodes["C"].dist), params[2][1] / (g * stree.nodes[2].dist) ], .001) print g, pg, pa, pb, pc tot += pg * pa * pb * pc tot /= len(gs) print(tree.nodes["c1"].dist + tree.nodes[2].dist, [params["C"][0], params[2][0]], [params["C"][1], params[2][1]]) print "C", p print "P", log(tot)
def getBaserate(tree, stree, params, recon=None, gene2species=None): if recon == None: assert gene2species != None recon = phylo.reconcile(tree, stree, gene2species) events = phylo.labelEvents(tree, recon) extraBranches = getExtraBranches(tree.root, recon, events, stree) lens = [] means = [] sdevs = [] # process each child of subtree root def walk(node, depths, sroot, extra): # save depth of node if recon[node] != recon[tree.root]: #stree.root: depths[node] = node.dist + depths[node.parent] else: # ignore branch length of free branches depths[node] = depths[node.parent] # record presence of extra in path extra = extra or ("extra" in node.data) if events[node] == "dup": # recurse within dup-only subtree # therefore pass depths and sroot unaltered node.recurse(walk, depths, sroot, extra) else: # we are at subtree leaf # figure out species branches that we cross # get total mean and variance of this path mu = 0 sigma2 = 0 snode = recon[node] # branch is also free if we do not cross any more species # don't estimate baserates from extra branches if snode != sroot and not extra: while snode != sroot and snode != stree.root: mu += params[snode.name][0] sigma2 += params[snode.name][1]**2 snode = snode.parent assert abs(sigma2) > .00000001, "sigma too small" sigma = math.sqrt(sigma2) # save dist and params lens.append(depths[node]) means.append(mu) sdevs.append(sigma) # continue recursion, but with new depths and sroot for child in node.children: walk(child, depths={node: 0}, sroot=recon[node], extra=False) for child in tree.root.children: walk(child, depths={tree.root: 0}, sroot=recon[tree.root], extra=False) baserate = mleBaserate(lens, means, sdevs, params["baserate"]) return baserate
def treeLogLikelihood_python(conf, tree, stree, gene2species, params, baserate=None, integration="fastsampling"): # debug info if isDebug(DEBUG_MED): util.tic("find logl") # derive relative branch lengths tree.clear_data("logl", "extra", "fracs", "params", "unfold") recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) # determine if top branch unfolds if recon[tree.root] == stree.root and \ events[tree.root] == "dup": for child in tree.root.children: if recon[child] != stree.root: child.data["unfold"] = True if baserate == None: baserate = getBaserate(tree, stree, params, recon=recon) phylo.midroot_recon(tree, stree, recon, events, params, baserate) # top branch is "free" params[stree.root.name] = [0,0] this = util.Bundle(logl=0.0) # recurse through indep sub-trees def walk(node): if events[node] == "spec" or \ node == tree.root: this.logl += subtreeLikelihood(conf, node, recon, events, stree, params, baserate, integration=integration) node.recurse(walk) walk(tree.root) # calc probability of rare events tree.data["eventlogl"] = rareEventsLikelihood(conf, tree, stree, recon, events) this.logl += tree.data["eventlogl"] # calc penality of error tree.data["errorlogl"] = tree.data.get("error", 0.0) * \ conf.get("errorcost", 0.0) this.logl += tree.data["errorlogl"] # family rate likelihood if conf["famprob"]: this.logl += log(stats.gammaPdf(baserate, params["baserate"])) tree.data["baserate"] = baserate tree.data["logl"] = this.logl if isDebug(DEBUG_MED): util.toc() debug("\n\n") drawTreeLogl(tree, events=events) return this.logl
def spidir(conf, distmat, labels, stree, gene2species, params): """Main function for the SPIDIR algorithm""" setDebug(conf["debug"]) if isDebug(DEBUG_HIGH) and pyspidir: pyspidir.set_log(3, "") if "out" in conf: # create debug table conf["debugtab_file"] = file(conf["out"] + ".debug.tab", "w") debugtab = tablelib.Table(headers=["correct", "logl", "treelen", "baserate", "error", "errorlogl", "eventlogl", "tree", "topology", "species_hash"], types={"correct": bool, "logl": float, "treelen": float, "baserate": float, "error": float, "errorlogl": float, "eventlogl": float, "tree": str, "topology": str, "species_hash": str}) debugtab.writeHeader(conf["debugtab_file"]) conf["debugtab"] = debugtab else: conf["debugfile"] = None trees = [] logls = [] tree = None visited = {} util.tic("SPIDIR") # do auto searches for search in conf["search"]: util.tic("Search by %s" % search) if search == "greedy": tree, logl = Search.searchGreedy(conf, distmat, labels, stree, gene2species, params, visited=visited) elif search == "mcmc": tree, logl = Search.searchMCMC(conf, distmat, labels, stree, gene2species, params, initTree=tree, visited=visited) elif search == "regraft": tree, logl = Search.searchRegraft(conf, distmat, labels, stree, gene2species, params, initTree=tree, visited=visited, proposeFunc=Search.proposeTree3) elif search == "exhaustive": if tree == None: tree = phylo.neighborjoin(distmat, labels) tree = phylo.recon_root(tree, stree, gene2species) tree, logl = Search.searchExhaustive(conf, distmat, labels, tree, stree, gene2species, params, depth=conf["depth"], visited=visited) elif search == "hillclimb": tree, logl = Search.searchHillClimb(conf, distmat, labels, stree, gene2species, params, initTree=tree, visited=visited) elif search == "none": break else: raise SindirError("unknown search '%s'" % search) util.toc() Search.printMCMC(conf, "N/A", tree, stree, gene2species, visited) printVisitedTrees(visited) def evalUserTree(tree): setTreeDistances(conf, tree, distmat, labels) logl = treeLogLikelihood(conf, tree, stree, gene2species, params) thash = phylo.hash_tree(tree) if thash in visited: a, b, count = visited[thash] else: count = 0 visited[thash] = [logl, tree.copy(), count+1] if isDebug(DEBUG_LOW): debug("\nuser given tree:") recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) drawTreeLogl(tree, events=events) # eval the user given trees for treefile in conf["tree"]: tree = treelib.read_tree(treefile) evalUserTree(tree) for topfile in conf["tops"]: infile = file(topfile) strees = [] while True: try: strees.append(treelib.read_tree(infile)) except: break print len(strees) for top in strees: tree = phylo.stree2gtree(top, labels, gene2species) evalUserTree(tree) if len(conf["tops"]) > 0: printVisitedTrees(visited) # eval correcttree for debug only if "correcttree" in conf: tree = conf["correcttree"] setTreeDistances(conf, tree, distmat, labels) logl = treeLogLikelihood(conf, tree, stree, gene2species, params) if isDebug(DEBUG_LOW): debug("\ncorrect tree:") recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) drawTreeLogl(tree, events=events) util.toc() if len(visited) == 0: raise SindirError("No search or tree topologies given") if "correcthash" in conf: if conf["correcthash"] in visited: debug("SEARCH: visited correct tree") else: debug("SEARCH: NEVER saw correct tree") # return ML tree trees = [x[1] for x in visited.itervalues()] i = util.argmax([x.data["logl"] for x in trees]) return trees[i], trees[i].data["logl"]
def treeLogLikelihood(conf, tree, stree, gene2species, params, baserate=None): conf.setdefault("bestlogl", -util.INF) if pyspidir == None or conf.get("python_only", False): return Likelihood.treeLogLikelihood_python(conf, tree, stree, gene2species, params, baserate=baserate, integration="fastsampling") # debug info if isDebug(DEBUG_MED): util.tic("find logl") # derive relative branch lengths #tree.clearData("logl", "extra", "fracs", "params", "unfold") recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) # determine if top branch unfolds if recon[tree.root] == stree.root and \ events[tree.root] == "dup": for child in tree.root.children: if recon[child] != stree.root: child.data["unfold"] = True # top branch is "free" params[stree.root.name] = [0,0] this = util.Bundle(logl=0.0) if conf.get("generate_int", False): baserate = -99.0 # indicates in integration over gene rates is requested elif baserate == None: baserate = Likelihood.getBaserate(tree, stree, params, recon=recon) phylo.midroot_recon(tree, stree, recon, events, params, baserate) # calc likelihood in C this.logl = treeLikelihood_C(conf, tree, recon, events, stree, params, baserate, gene2species) # calc probability of rare events tree.data["eventlogl"] = Likelihood.rareEventsLikelihood(conf, tree, stree, recon, events) # calc penality of error tree.data["errorlogl"] = tree.data.get("error", 0.0) * \ conf.get("errorcost", 0.0) this.logl += tree.data["errorlogl"] # add logl of sequence evolution this.logl += tree.data.get("distlogl", 0.0) if baserate == -99.0: # indicates in integration over gene rates is requested baserate = Likelihood.getBaserate(tree, stree, params, recon=recon) tree.data["baserate"] = baserate tree.data["logl"] = this.logl if isDebug(DEBUG_MED): util.toc() debug("\n\n") drawTreeLogl(tree, events=events) return this.logl
def test_branch_prior_simple2(self): """Test branch prior 2""" tree = treelib.parse_newick("((a1:2, a2:3):.4, b1:2);") stree = treelib.parse_newick("(A:2, B:2);") gene2species = lambda x: x[0].upper() params = {"A": (1.0, 1.0), "B": (3.0, 3.0), 1: (1.0, 1.0), "baserate": (11.0, 10.0)} birth = 0.01 death = 0.02 pretime = 1.0 nsamples = 100 recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) # pd(mapdict(recon, key=lambda x: x.name, val=lambda x: x.name)) # pd(mapdict(events, key=lambda x: x.name)) p = spidir.branch_prior(tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=False) tot = 0.0 gstart = 0.01 gend = 3.0 step = (gend - gstart) / 20.0 s2 = step / 2.0 gs = list(frange(gstart + s2, gend + s2, step)) for g in gs: pg = invgammaPdf(g, params["baserate"]) pa = 0.0 for i in range(nsamples): t = birthdeath.sample_birth_wait_time(1, stree.nodes["A"].dist, birth, death) # print t t2 = stree.nodes["A"].dist - t pa1 = gammaPdf(tree.nodes["a1"].dist, [params["A"][0], params["A"][1] / (g * t2)]) pa2 = gammaPdf(tree.nodes["a2"].dist, [params["A"][0], params["A"][1] / (g * t2)]) pb = spidir.gammaSumPdf( tree.nodes["b1"].dist + tree.nodes[2].dist, 2, [params["B"][0], params["A"][0]], [params["B"][1] / (g * stree.nodes["B"].dist), params["A"][1] / (g * t)], 0.001, ) if "nan" not in map(str, [pa1, pa2, pb]): pa += pa1 * pa2 * pb / nsamples tot += pg * pa * step # tot /= len(gs) print "unfold", ( tree.nodes["b1"].dist + tree.nodes[2].dist, [params["B"][0], params["A"][0]], [params["B"][1] / (g * stree.nodes["B"].dist), params["A"][1] / (g * t)], ) print "C", p print "P", log(tot)
def draw_tree(tree, labels={}, xscale=100, yscale=20, canvas=None, leafPadding=10, labelOffset=None, fontSize=10, labelSize=None, minlen=1, maxlen=util.INF, filename=sys.stdout, rmargin=150, lmargin=10, tmargin=0, bmargin=None, colormap=None, stree=None, layout=None, gene2species=None, lossColor=(0, 0, 1), dupColor=(1, 0, 0), eventSize=4, legendScale=False, autoclose=None): # set defaults fontRatio = 8. / 11. if labelSize == None: labelSize = .7 * fontSize if labelOffset == None: labelOffset = -1 if bmargin == None: bmargin = yscale if sum(x.dist for x in tree.nodes.values()) == 0: legendScale = False minlen = xscale if colormap == None: for node in tree: node.color = (0, 0, 0) else: colormap(tree) if stree and gene2species: recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) losses = phylo.find_loss(tree, stree, recon) else: events = None losses = None # layout tree if layout is None: coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen) else: coords = layout xcoords, ycoords = zip(* coords.values()) maxwidth = max(xcoords) maxheight = max(ycoords) + labelOffset # initialize canvas if canvas == None: canvas = svg.Svg(util.open_stream(filename, "w")) width = int(rmargin + maxwidth + lmargin) height = int(tmargin + maxheight + bmargin) canvas.beginSvg(width, height) if autoclose == None: autoclose = True else: if autoclose == None: autoclose = False # draw tree def walk(node): x, y = coords[node] if node.parent: parentx = coords[node.parent][0] else: parentx = 0 # draw branch canvas.line(parentx, y, x, y, color=node.color) if node.name in labels: branchlen = x - parentx lines = str(labels[node.name]).split("\n") labelwidth = max(map(len, lines)) labellen = min(labelwidth * fontRatio * fontSize, max(int(branchlen-1), 0)) for i, line in enumerate(lines): canvas.text(line, parentx + (branchlen - labellen)/2., y + labelOffset +(-len(lines)+1+i)*(labelSize+1), labelSize) if node.isLeaf(): canvas.text(str(node.name), x + leafPadding, y+fontSize/2., fontSize, fillColor=node.color) else: top = coords[node.children[0]][1] bot = coords[node.children[-1]][1] # draw children canvas.line(x, top, x, bot, color=node.color) for child in node.children: walk(child) canvas.beginTransform(("translate", lmargin, tmargin)) walk(tree.root) if stree and gene2species: draw_events(canvas, tree, coords, events, losses, lossColor=lossColor, dupColor=dupColor, size=eventSize) canvas.endTransform() # draw legend if legendScale: if legendScale == True: # automatically choose a scale length = maxwidth / float(xscale) order = math.floor(math.log10(length)) length = 10 ** order drawScale(lmargin, tmargin + maxheight + bmargin - fontSize, length, xscale, fontSize, canvas=canvas) if autoclose: canvas.endSvg() return canvas
def _test_branch_prior_approx(self): """Test branch prior""" prep_dir("test/output/branch_prior") out = open("test/output/branch_prior/flies.approx.txt", "w") out = sys.stderr treeids = os.listdir("test/data/flies") for treeid in treeids: tree = read_tree("test/data/flies-duploss/%s/%s.nt.tree" % (treeid, treeid)) print treeid draw_tree(tree) stree = read_tree("test/data/flies.stree") gene2species = phylo.read_gene2species("test/data/flies.smap") params = spidir.read_params("test/data/flies.param") birth = .0012 death = .0013 pretime = 1.0 nsamples = 100 recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) p = [ spidir.branch_prior(tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=False) for i in xrange(30) ] p2 = [ spidir.branch_prior(tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=True) for i in xrange(30) ] row = [ treeid, mean(p), exc_default(lambda: sdev(p), INF), mean(p2), exc_default(lambda: sdev(p2), INF) ] print >> out, "\t".join(map(str, row)) self.assert_(INF not in row and -INF not in row) out.close()
def test_branch_prior_simple2(self): """Test branch prior 2""" tree = treelib.parse_newick("((a1:2, a2:3):.4, b1:2);") stree = treelib.parse_newick("(A:2, B:2);") gene2species = lambda x: x[0].upper() params = { "A": (1.0, 1.0), "B": (3.0, 3.0), 1: (1.0, 1.0), "baserate": (11.0, 10.0) } birth = .01 death = .02 pretime = 1.0 nsamples = 100 recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) #pd(mapdict(recon, key=lambda x: x.name, val=lambda x: x.name)) #pd(mapdict(events, key=lambda x: x.name)) p = spidir.branch_prior(tree, stree, recon, events, params, birth, death, nsamples=nsamples, approx=False) tot = 0.0 gstart = 0.01 gend = 3.0 step = (gend - gstart) / 20.0 s2 = step / 2.0 gs = list(frange(gstart + s2, gend + s2, step)) for g in gs: pg = invgammaPdf(g, params["baserate"]) pa = 0.0 for i in range(nsamples): t = birthdeath.sample_birth_wait_time(1, stree.nodes["A"].dist, birth, death) #print t t2 = stree.nodes["A"].dist - t pa1 = gammaPdf(tree.nodes["a1"].dist, [params["A"][0], params["A"][1] / (g * t2)]) pa2 = gammaPdf(tree.nodes["a2"].dist, [params["A"][0], params["A"][1] / (g * t2)]) pb = spidir.gammaSumPdf( tree.nodes["b1"].dist + tree.nodes[2].dist, 2, [params["B"][0], params["A"][0]], [ params["B"][1] / (g * stree.nodes["B"].dist), params["A"][1] / (g * t) ], .001) if "nan" not in map(str, [pa1, pa2, pb]): pa += pa1 * pa2 * pb / nsamples tot += pg * pa * step #tot /= len(gs) print "unfold", (tree.nodes["b1"].dist + tree.nodes[2].dist, [params["B"][0], params["A"][0]], [ params["B"][1] / (g * stree.nodes["B"].dist), params["A"][1] / (g * t) ]) print "C", p print "P", log(tot)