Ejemplo n.º 1
0
    def _test_ml_speed(self):
        
        # params
        bgfreq = [.258,.267,.266,.209]
        kappa = 1.59

        # data
        tree = treelib.readTree("test/data/flies.nt/0/0.tree")
        align = fasta.readFasta("test/data/flies.nt/0/0.align")


        likes = []
        dists = []

        nodes = sorted(tree.nodes.values(), key=lambda x: x.dist)

        util.tic("find ML")
        for i in xrange(10):
            l = spidir.find_ml_branch_lengths_hky(
                tree,
                util.mget(align, tree.leafNames()),
                bgfreq, kappa,
                maxiter=10)            
        util.toc()

        dists.append([n.dist for n in nodes])
        likes.append(l)
Ejemplo n.º 2
0
    def test_ml_large(self):
        """Test ML code"""

        # params
        bgfreq = [.258,.267,.266,.209]
        kappa = 1.59

        # data
        tree = treelib.readTree("test/data/verts/19520/19520.ensembl.tree")
        align = fasta.readFasta("test/data/verts/19520/19520.nt.mfa")


        likes = []
        dists = []

        nodes = sorted(tree.nodes.values(), key=lambda x: x.dist)

        l = spidir.calc_seq_likelihood_hky(tree, align, bgfreq, kappa)
        print l
        self.assert_(l != -util.INF)


        l = spidir.find_ml_branch_lengths_hky(
            tree,
            util.mget(align, tree.leafNames()),
            bgfreq, kappa,
            parsinit=False,
            maxiter=1)
        print l
        self.assert_(l != -util.INF)
Ejemplo n.º 3
0
    def test_ml_large(self):
        """Test ML code"""

        # params
        bgfreq = [.258, .267, .266, .209]
        kappa = 1.59

        # data
        tree = treelib.readTree("test/data/verts/19520/19520.ensembl.tree")
        align = fasta.readFasta("test/data/verts/19520/19520.nt.mfa")

        likes = []
        dists = []

        nodes = sorted(tree.nodes.values(), key=lambda x: x.dist)

        l = spidir.calc_seq_likelihood_hky(tree, align, bgfreq, kappa)
        print l
        self.assert_(l != -util.INF)

        l = spidir.find_ml_branch_lengths_hky(tree,
                                              util.mget(
                                                  align, tree.leafNames()),
                                              bgfreq,
                                              kappa,
                                              parsinit=False,
                                              maxiter=1)
        print l
        self.assert_(l != -util.INF)
Ejemplo n.º 4
0
    def _test_ml_speed(self):

        # params
        bgfreq = [.258, .267, .266, .209]
        kappa = 1.59

        # data
        tree = treelib.readTree("test/data/flies.nt/0/0.tree")
        align = fasta.readFasta("test/data/flies.nt/0/0.align")

        likes = []
        dists = []

        nodes = sorted(tree.nodes.values(), key=lambda x: x.dist)

        util.tic("find ML")
        for i in xrange(10):
            l = spidir.find_ml_branch_lengths_hky(tree,
                                                  util.mget(
                                                      align, tree.leafNames()),
                                                  bgfreq,
                                                  kappa,
                                                  maxiter=10)
        util.toc()

        dists.append([n.dist for n in nodes])
        likes.append(l)
Ejemplo n.º 5
0
    def _test_ml(self):
        """Test ML code"""

        # params
        bgfreq = [.258, .267, .266, .209]
        kappa = 1.59

        # data
        tree = treelib.readTree("test/data/flies.nt/0/0.tree")
        align = fasta.readFasta("test/data/flies.nt/0/0.align")

        likes = []
        dists = []

        nodes = sorted(tree.nodes.values(), key=lambda x: x.dist)

        util.tic("find ML")
        for i in range(40):
            l = spidir.find_ml_branch_lengths_hky(tree,
                                                  util.mget(
                                                      align, tree.leafNames()),
                                                  bgfreq,
                                                  kappa,
                                                  parsinit=False,
                                                  maxiter=1)

            dists.append([n.dist for n in nodes])
            likes.append(l)
        util.toc()

        print likes

        prep_dir("test/output/ml/")

        # distances plot
        util.rplot_start("test/output/ml/ml_branches.pdf")
        util.rplot("plot",
                   util.cget(dists, 0),
                   ylim=[0, max(dists[0])],
                   t="l",
                   main="branch length convergence",
                   xlab="iterations",
                   ylab="branch lengths (sub/site)")
        for d in zip(*dists):
            util.rplot("lines", d)
        util.rplot_end(True)

        print util.cget(dists, 4)

        # likelihood plot
        util.rplot_start("test/output/ml/ml_likelihood.pdf")
        util.rplot("plot",
                   likes,
                   t="l",
                   xlab="iterations",
                   ylab="log likelihood",
                   main="likelihood convergence")
        util.rplot_end(True)
Ejemplo n.º 6
0
    def _test_ml(self):
        """Test ML code"""

        # params
        bgfreq = [.258,.267,.266,.209]
        kappa = 1.59

        # data
        tree = treelib.readTree("test/data/flies.nt/0/0.tree")
        align = fasta.readFasta("test/data/flies.nt/0/0.align")


        likes = []
        dists = []

        nodes = sorted(tree.nodes.values(), key=lambda x: x.dist)

        util.tic("find ML")
        for i in range(40):
            l = spidir.find_ml_branch_lengths_hky(
                    tree,
                    util.mget(align, tree.leafNames()),
                    bgfreq, kappa,
                    parsinit=False,
                    maxiter=1)
            
            dists.append([n.dist for n in nodes])
            likes.append(l)
        util.toc()

        print likes

        prep_dir("test/output/ml/")

        # distances plot
        util.rplot_start("test/output/ml/ml_branches.pdf")
        util.rplot("plot", util.cget(dists, 0),
                   ylim=[0, max(dists[0])], t="l",
                   main="branch length convergence",
                   xlab="iterations",
                   ylab="branch lengths (sub/site)")
        for d in zip(* dists):
            util.rplot("lines", d)
        util.rplot_end(True)

        print util.cget(dists, 4)

        # likelihood plot
        util.rplot_start("test/output/ml/ml_likelihood.pdf")
        util.rplot("plot", likes, t="l",
                   xlab="iterations",
                   ylab="log likelihood",
                   main="likelihood convergence")
        util.rplot_end(True)
Ejemplo n.º 7
0
def readOptions(conf):
    """Setup data paths and parse common options"""
    
    # read species map
    if "smap" in conf:
        conf["gene2species"] = readGene2species(*conf["smap"])
    else:
        conf["gene2species"] = gene2species
    
    if "stree" in conf:
        conf["stree"] = treelib.readTree(conf["stree"])
Ejemplo n.º 8
0
def gene2speciesArray(tree, stree, gene2species):
    ptree, nodes, nodelookup = Spidir.makePtree(tree)
    sptree, snodes, snodelookup = Spidir.makePtree(stree)

    gene2speciesarray = []
    for node in nodes:
        if node.isLeaf():
            gene2speciesarray.append(snodelookup[
                                     stree.nodes[gene2species(node.name)]])
        else:
            gene2speciesarray.append(-1)
    return gene2speciesarray
    

tree = treelib.readTree("../data/0.nt.tree")
tree = treelib.parseNewick("((dmoj_sim,(dvir_sim,dgri_sim)),(dwil_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));")
tree = treelib.parseNewick("((dwil_sim,(dvir_sim,dgri_sim)),(dmoj_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));")

stree = treelib.readTree("../data/flies.stree")
gene2species = genomeutil.readGene2species("../data/flies.smap")
params = Spidir.readParams("../data/flies.nt.param")


ptree, nodes, nodelookup = Spidir.makePtree(tree)
pstree, snodes, snodelookup = Spidir.makePtree(stree)
g2s = gene2speciesArray(tree, stree, gene2species)
mu = [float(params[snode.name][0]) for snode in snodes]
sigma = [float(params[snode.name][1]) for snode in snodes]
alpha = float(params['baserate'][0])
beta = float(params['baserate'][1])
Ejemplo n.º 9
0
def gene2speciesArray(tree, stree, gene2species):
    ptree, nodes, nodelookup = Spidir.makePtree(tree)
    sptree, snodes, snodelookup = Spidir.makePtree(stree)

    gene2speciesarray = []
    for node in nodes:
        if node.isLeaf():
            gene2speciesarray.append(snodelookup[stree.nodes[gene2species(
                node.name)]])
        else:
            gene2speciesarray.append(-1)
    return gene2speciesarray


tree = treelib.readTree("../data/0.nt.tree")
tree = treelib.parseNewick(
    "((dmoj_sim,(dvir_sim,dgri_sim)),(dwil_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));"
)
tree = treelib.parseNewick(
    "((dwil_sim,(dvir_sim,dgri_sim)),(dmoj_sim,(dpse_sim,(dana_sim,(dmel_sim,(dyak_sim,dere_sim))))));"
)

stree = treelib.readTree("../data/flies.stree")
gene2species = genomeutil.readGene2species("../data/flies.smap")
params = Spidir.readParams("../data/flies.nt.param")

ptree, nodes, nodelookup = Spidir.makePtree(tree)
pstree, snodes, snodelookup = Spidir.makePtree(stree)
g2s = gene2speciesArray(tree, stree, gene2species)
mu = [float(params[snode.name][0]) for snode in snodes]