def buildTree(conf, stree, gene2species): params = Spidir.readParams(conf["param"]) if "correcttree" in conf: conf["correcthash"] = phylo.hash_tree(conf["correcttree"]) if "dist" in conf: for i in range(len(conf["dist"])): distfile = conf["dist"][i] labels, distmat = phylip.read_dist_matrix(distfile) # read in different labels if needed if "labels" in conf: labels = Spidir.readLabels(conf["labels"][i]) conf["aln"] = fasta.read_fasta(conf["labels"][i]) tree, logl = Spidir.spidir(conf, distmat, labels, stree, gene2species, params) tree.write(Spidir.outTreeFile(conf)) # test for correctness if "correcttree" in conf: correctTree = conf["correcttree"] phylo.hash_order_tree(correctTree) phylo.hash_order_tree(tree) thash1 = phylo.hash_tree(tree) thash2 = phylo.hash_tree(correctTree) print "spidir: " treelib.draw_tree(tree, maxlen=5, minlen=5) print print "correct:" treelib.draw_tree(correctTree, maxlen=5, minlen=5) print if len(tree.leaves()) > 3: rferror = Spidir.robinson_foulds_error(correctTree, tree) else: rferror = 0.0 if thash1 == thash2: print "CORRECT TREE FOUND" else: print "WRONG TREE FOUND (RF: %f)" % rferror
#!/usr/bin/env python from rasmus.common import * from rasmus.bio import phylo import Spidir import Spidir.Likelihood tree = readTree("../test/0.nt.tree") stree = readTree("../test/flies.stree") gene2species = genomeutil.readGene2species("../test/flies.smap") params = Spidir.readParams("../test/flies.nt.param") drawTree(tree) print sum(x.dist for x in tree) print Spidir.estGeneRate(tree, stree, params, gene2species) print Spidir.estGeneRate(tree, stree, params, gene2species) print Spidir.estGeneRate(tree, stree, params, gene2species) print Spidir.Likelihood.getBaserate(tree, stree, params, gene2species=gene2species) conf = {"python_only": True, "famprob": True} print Spidir.treeLogLikelihood(conf, tree, stree, gene2species, params) conf = {} #generate = Spidir.estGeneRate(tree, stree, params, gene2species) for generate in frange(1.5, 2.3, .05): print generate, Spidir.treeLogLikelihood(conf, tree, stree, gene2species, params, baserate=generate)
for node in nodes: if node.isLeaf(): gene2speciesarray.append(snodelookup[ stree.nodes[gene2species(node.name)]]) else: gene2speciesarray.append(-1) return gene2speciesarray tree = treelib.readTree("../data/0.nt.tree") tree = treelib.parseNewick("((dmoj_sim,(dvir_sim,dgri_sim)),(dwil_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));") tree = treelib.parseNewick("((dwil_sim,(dvir_sim,dgri_sim)),(dmoj_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));") stree = treelib.readTree("../data/flies.stree") gene2species = genomeutil.readGene2species("../data/flies.smap") params = Spidir.readParams("../data/flies.nt.param") ptree, nodes, nodelookup = Spidir.makePtree(tree) pstree, snodes, snodelookup = Spidir.makePtree(stree) g2s = gene2speciesArray(tree, stree, gene2species) mu = [float(params[snode.name][0]) for snode in snodes] sigma = [float(params[snode.name][1]) for snode in snodes] alpha = float(params['baserate'][0]) beta = float(params['baserate'][1]) dists = pyspidir.genbranches(ptree, pstree, g2s, mu, sigma, alpha, beta) for node, d in zip(nodes, dists): node.dist = d
#!/usr/bin/env python from rasmus.common import * from rasmus.bio import phylo import Spidir import spidir tree = readTree("test/data/0.nt.tree") stree = readTree("test/data/flies.stree") gene2species = genomeutil.readGene2species("test/data/flies.smap") params = Spidir.readParams("test/data/flies.nt.param") aln = readFasta("test/data/1.nt.align") bgfreqs = [.258,.267,.266,.209] tsvratio = 1.59 drawTree(tree) print sum(x.dist for x in tree) print Spidir.estGeneRate(tree, stree, params, gene2species) print Spidir.estGeneRate(tree, stree, params, gene2species) print Spidir.Likelihood.getBaserate(tree, stree, params, gene2species=gene2species) #conf = {"python_only": True, # "famprob": True} #print Spidir.treeLogLikelihood(conf, tree, stree, gene2species, params) util.tic("sample") generates = spidir.sample_gene_rate(tree, stree, gene2species, params, aln, bgfreqs, tsvratio, 20000)
#!/usr/bin/env python from rasmus.common import * from rasmus.bio import phylo import Spidir import Spidir.Likelihood tree = readTree("../test/0.nt.tree") stree = readTree("../test/flies.stree") gene2species = genomeutil.readGene2species("../test/flies.smap") params = Spidir.readParams("../test/flies.nt.param") drawTree(tree) print sum(x.dist for x in tree) print Spidir.estGeneRate(tree, stree, params, gene2species) print Spidir.estGeneRate(tree, stree, params, gene2species) print Spidir.estGeneRate(tree, stree, params, gene2species) print Spidir.Likelihood.getBaserate(tree, stree, params, gene2species=gene2species) conf = {"python_only": True, "famprob": True} print Spidir.treeLogLikelihood(conf, tree, stree, gene2species, params) conf = {} #generate = Spidir.estGeneRate(tree, stree, params, gene2species) for generate in frange(1.5, 2.3, .05): print generate, Spidir.treeLogLikelihood(conf,
else: gene2speciesarray.append(-1) return gene2speciesarray tree = treelib.readTree("../data/0.nt.tree") tree = treelib.parseNewick( "((dmoj_sim,(dvir_sim,dgri_sim)),(dwil_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));" ) tree = treelib.parseNewick( "((dwil_sim,(dvir_sim,dgri_sim)),(dmoj_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));" ) stree = treelib.readTree("../data/flies.stree") gene2species = genomeutil.readGene2species("../data/flies.smap") params = Spidir.readParams("../data/flies.nt.param") ptree, nodes, nodelookup = Spidir.makePtree(tree) pstree, snodes, snodelookup = Spidir.makePtree(stree) g2s = gene2speciesArray(tree, stree, gene2species) mu = [float(params[snode.name][0]) for snode in snodes] sigma = [float(params[snode.name][1]) for snode in snodes] alpha = float(params['baserate'][0]) beta = float(params['baserate'][1]) dists = pyspidir.genbranches(ptree, pstree, g2s, mu, sigma, alpha, beta) for node, d in zip(nodes, dists): node.dist = d drawTree(tree)
#!/usr/bin/env python from rasmus.common import * from rasmus.bio import phylo import Spidir import spidir tree = readTree("test/data/0.nt.tree") stree = readTree("test/data/flies.stree") gene2species = genomeutil.readGene2species("test/data/flies.smap") params = Spidir.readParams("test/data/flies.nt.param") aln = readFasta("test/data/1.nt.align") bgfreqs = [.258, .267, .266, .209] tsvratio = 1.59 drawTree(tree) print sum(x.dist for x in tree) print Spidir.estGeneRate(tree, stree, params, gene2species) print Spidir.estGeneRate(tree, stree, params, gene2species) print Spidir.Likelihood.getBaserate(tree, stree, params, gene2species=gene2species) #conf = {"python_only": True, # "famprob": True} #print Spidir.treeLogLikelihood(conf, tree, stree, gene2species, params) util.tic("sample")