Esempio n. 1
0
def buildTree(conf, stree, gene2species):
    params = Spidir.readParams(conf["param"])
    
    if "correcttree" in conf:
        conf["correcthash"] = phylo.hash_tree(conf["correcttree"])
    
    
    if "dist" in conf:
        for i in range(len(conf["dist"])):
            distfile = conf["dist"][i]
            
            labels, distmat = phylip.read_dist_matrix(distfile)
        
            # read in different labels if needed
            if "labels" in conf:
                labels = Spidir.readLabels(conf["labels"][i])
                conf["aln"] = fasta.read_fasta(conf["labels"][i])
            
            tree, logl = Spidir.spidir(conf, distmat, labels, stree, 
                                          gene2species, params)
            tree.write(Spidir.outTreeFile(conf))
            
            # test for correctness
            if "correcttree" in conf:
                correctTree = conf["correcttree"]
                phylo.hash_order_tree(correctTree)
                phylo.hash_order_tree(tree)
                
                thash1 = phylo.hash_tree(tree)
                thash2 = phylo.hash_tree(correctTree)
                
                print "spidir: "
                treelib.draw_tree(tree, maxlen=5, minlen=5)
                print
                
                print "correct:"
                treelib.draw_tree(correctTree, maxlen=5, minlen=5)
                print
                
                if len(tree.leaves()) > 3:
                    rferror = Spidir.robinson_foulds_error(correctTree, tree)
                else:
                    rferror = 0.0
                
                if thash1 == thash2:
                    print "CORRECT TREE FOUND"
                else:
                    print "WRONG TREE FOUND (RF: %f)" % rferror
Esempio n. 2
0
#!/usr/bin/env python

from rasmus.common import *
from rasmus.bio import phylo
import Spidir
import Spidir.Likelihood

tree = readTree("../test/0.nt.tree")
stree = readTree("../test/flies.stree")
gene2species = genomeutil.readGene2species("../test/flies.smap")
params = Spidir.readParams("../test/flies.nt.param")

drawTree(tree)

print sum(x.dist for x in tree)
print Spidir.estGeneRate(tree, stree, params, gene2species)
print Spidir.estGeneRate(tree, stree, params, gene2species)
print Spidir.estGeneRate(tree, stree, params, gene2species)
print Spidir.Likelihood.getBaserate(tree, stree, params, gene2species=gene2species)

conf = {"python_only": True, 
        "famprob": True}
print Spidir.treeLogLikelihood(conf, tree, stree, gene2species, params)

conf = {}
#generate = Spidir.estGeneRate(tree, stree, params, gene2species)

for generate in frange(1.5, 2.3, .05):
    print generate, Spidir.treeLogLikelihood(conf, tree, stree, gene2species, params, 
                                             baserate=generate)
Esempio n. 3
0
    for node in nodes:
        if node.isLeaf():
            gene2speciesarray.append(snodelookup[
                                     stree.nodes[gene2species(node.name)]])
        else:
            gene2speciesarray.append(-1)
    return gene2speciesarray
    

tree = treelib.readTree("../data/0.nt.tree")
tree = treelib.parseNewick("((dmoj_sim,(dvir_sim,dgri_sim)),(dwil_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));")
tree = treelib.parseNewick("((dwil_sim,(dvir_sim,dgri_sim)),(dmoj_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));")

stree = treelib.readTree("../data/flies.stree")
gene2species = genomeutil.readGene2species("../data/flies.smap")
params = Spidir.readParams("../data/flies.nt.param")


ptree, nodes, nodelookup = Spidir.makePtree(tree)
pstree, snodes, snodelookup = Spidir.makePtree(stree)
g2s = gene2speciesArray(tree, stree, gene2species)
mu = [float(params[snode.name][0]) for snode in snodes]
sigma = [float(params[snode.name][1]) for snode in snodes]
alpha = float(params['baserate'][0])
beta = float(params['baserate'][1])


dists = pyspidir.genbranches(ptree, pstree, g2s, mu, sigma, alpha, beta)

for node, d in zip(nodes, dists):
    node.dist = d
#!/usr/bin/env python

from rasmus.common import *
from rasmus.bio import phylo
import Spidir

import spidir

tree = readTree("test/data/0.nt.tree")
stree = readTree("test/data/flies.stree")
gene2species = genomeutil.readGene2species("test/data/flies.smap")
params = Spidir.readParams("test/data/flies.nt.param")
aln = readFasta("test/data/1.nt.align")
bgfreqs = [.258,.267,.266,.209]
tsvratio = 1.59

drawTree(tree)

print sum(x.dist for x in tree)
print Spidir.estGeneRate(tree, stree, params, gene2species)
print Spidir.estGeneRate(tree, stree, params, gene2species)
print Spidir.Likelihood.getBaserate(tree, stree, params,
                                    gene2species=gene2species)

#conf = {"python_only": True, 
#        "famprob": True}
#print Spidir.treeLogLikelihood(conf, tree, stree, gene2species, params)

util.tic("sample")
generates = spidir.sample_gene_rate(tree, stree, gene2species, params,
                                    aln, bgfreqs, tsvratio, 20000)
Esempio n. 5
0
#!/usr/bin/env python

from rasmus.common import *
from rasmus.bio import phylo
import Spidir
import Spidir.Likelihood

tree = readTree("../test/0.nt.tree")
stree = readTree("../test/flies.stree")
gene2species = genomeutil.readGene2species("../test/flies.smap")
params = Spidir.readParams("../test/flies.nt.param")

drawTree(tree)

print sum(x.dist for x in tree)
print Spidir.estGeneRate(tree, stree, params, gene2species)
print Spidir.estGeneRate(tree, stree, params, gene2species)
print Spidir.estGeneRate(tree, stree, params, gene2species)
print Spidir.Likelihood.getBaserate(tree,
                                    stree,
                                    params,
                                    gene2species=gene2species)

conf = {"python_only": True, "famprob": True}
print Spidir.treeLogLikelihood(conf, tree, stree, gene2species, params)

conf = {}
#generate = Spidir.estGeneRate(tree, stree, params, gene2species)

for generate in frange(1.5, 2.3, .05):
    print generate, Spidir.treeLogLikelihood(conf,
Esempio n. 6
0
        else:
            gene2speciesarray.append(-1)
    return gene2speciesarray


tree = treelib.readTree("../data/0.nt.tree")
tree = treelib.parseNewick(
    "((dmoj_sim,(dvir_sim,dgri_sim)),(dwil_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));"
)
tree = treelib.parseNewick(
    "((dwil_sim,(dvir_sim,dgri_sim)),(dmoj_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));"
)

stree = treelib.readTree("../data/flies.stree")
gene2species = genomeutil.readGene2species("../data/flies.smap")
params = Spidir.readParams("../data/flies.nt.param")

ptree, nodes, nodelookup = Spidir.makePtree(tree)
pstree, snodes, snodelookup = Spidir.makePtree(stree)
g2s = gene2speciesArray(tree, stree, gene2species)
mu = [float(params[snode.name][0]) for snode in snodes]
sigma = [float(params[snode.name][1]) for snode in snodes]
alpha = float(params['baserate'][0])
beta = float(params['baserate'][1])

dists = pyspidir.genbranches(ptree, pstree, g2s, mu, sigma, alpha, beta)

for node, d in zip(nodes, dists):
    node.dist = d

drawTree(tree)
Esempio n. 7
0
#!/usr/bin/env python

from rasmus.common import *
from rasmus.bio import phylo
import Spidir

import spidir

tree = readTree("test/data/0.nt.tree")
stree = readTree("test/data/flies.stree")
gene2species = genomeutil.readGene2species("test/data/flies.smap")
params = Spidir.readParams("test/data/flies.nt.param")
aln = readFasta("test/data/1.nt.align")
bgfreqs = [.258, .267, .266, .209]
tsvratio = 1.59

drawTree(tree)

print sum(x.dist for x in tree)
print Spidir.estGeneRate(tree, stree, params, gene2species)
print Spidir.estGeneRate(tree, stree, params, gene2species)
print Spidir.Likelihood.getBaserate(tree,
                                    stree,
                                    params,
                                    gene2species=gene2species)

#conf = {"python_only": True,
#        "famprob": True}
#print Spidir.treeLogLikelihood(conf, tree, stree, gene2species, params)

util.tic("sample")