Example #1
0
        def dbranchlk(probs1, probs2, seqlen, bgfreq, kappa, t):

            model1 = spidir.make_hky_matrix(bgfreq, kappa, t)
            model2 = spidir.make_hky_matrix(bgfreq, kappa, 0.0)

            dmodel1 = spidir.make_hky_deriv_matrix(bgfreq, kappa, t)
            dmodel2 = spidir.make_hky_deriv_matrix(bgfreq, kappa, 0.0)

            logl = 0.0
            for j in xrange(seqlen):
                ds = sum(
                    bgfreq[k]
                    * sum(dmodel1[k][x] * probs1[4 * j + x] for x in xrange(4))
                    * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4))
                    for k in xrange(4)
                )

                s = sum(
                    bgfreq[k]
                    * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4))
                    * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4))
                    for k in xrange(4)
                )

                logl += safediv(ds, s, INF)
            return logl
Example #2
0
        def d2branchlk(probs1, probs2, seqlen, bgfreq, kappa, t):

            model1 = spidir.make_hky_matrix(bgfreq, kappa, t)
            model2 = spidir.make_hky_matrix(bgfreq, kappa, 0.0)

            dmodel1 = spidir.make_hky_deriv_matrix(bgfreq, kappa, t)
            dmodel2 = spidir.make_hky_deriv_matrix(bgfreq, kappa, 0.0)

            d2model1 = spidir.make_hky_deriv2_matrix(bgfreq, kappa, t)
            d2model2 = spidir.make_hky_deriv2_matrix(bgfreq, kappa, 0.0)

            logl = 0.0
            for j in xrange(seqlen):
                g = sum(bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x]
                                        for x in xrange(4)) *
                        sum(model2[k][y] * probs2[4 * j + y]
                            for y in xrange(4)) for k in xrange(4))

                dg = sum(bgfreq[k] * sum(dmodel1[k][x] * probs1[4 * j + x]
                                         for x in xrange(4)) *
                         sum(model2[k][y] * probs2[4 * j + y]
                             for y in xrange(4)) for k in xrange(4))

                d2g = sum(bgfreq[k] * sum(d2model1[k][x] * probs1[4 * j + x]
                                          for x in xrange(4)) *
                          sum(model2[k][y] * probs2[4 * j + y]
                              for y in xrange(4)) for k in xrange(4))


                logl += - safediv(dg*dg, g*g, INF) + \
                        safediv(d2g, g, INF)
            return logl
Example #3
0
        def branchlk(probs1, probs2, seqlen, bgfreq, kappa, t):

            model1 = spidir.make_hky_matrix(bgfreq, kappa, t)
            model2 = spidir.make_hky_matrix(bgfreq, kappa, 0)

            logl = 0.0
            for j in xrange(seqlen):
                s = sum(bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x]
                                        for x in xrange(4)) *
                        sum(model2[k][y] * probs2[4 * j + y]
                            for y in xrange(4)) for k in xrange(4))
                logl += safelog(s, e)

            return logl
Example #4
0
        def branchlk(probs1, probs2, seqlen, bgfreq, kappa, t):

            model1 = spidir.make_hky_matrix(bgfreq, kappa, t)
            model2 = spidir.make_hky_matrix(bgfreq, kappa, 0)

            logl = 0.0
            for j in xrange(seqlen):
                s = sum(
                    bgfreq[k]
                    * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4))
                    * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4))
                    for k in xrange(4)
                )
                logl += safelog(s, e)

            return logl
Example #5
0
    def test_hky(self):
        """general test"""

        bgfreq = [.3, .2, .3, .2]
        kappa = 2.0
        t = 0.2

        pprint(spidir.make_hky_matrix(bgfreq, kappa, t))
Example #6
0
    def test_hky(self):
        """general test"""
        
        bgfreq = [.3, .2, .3, .2]
        kappa = 2.0
        t = 0.2

        pprint(spidir.make_hky_matrix(bgfreq, kappa, t))

        pprint(phylo.make_hky_matrix(t, bgfreq, kappa))
Example #7
0
    def test_hky_deriv(self):
        """general test"""

        bgfreq = [.3, .2, .3, .2]
        kappa = 2.0
        i = random.randint(0, 3)
        j = random.randint(0, 3)

        x = list(frange(0, 1.0, .01))
        y = [spidir.make_hky_matrix(bgfreq, kappa, t)[i][j] for t in x]
        dy = [spidir.make_hky_deriv_matrix(bgfreq, kappa, t)[i][j] for t in x]
        dy2 = [(spidir.make_hky_matrix(bgfreq, kappa, t + .01)[i][j] -
                spidir.make_hky_matrix(bgfreq, kappa, t)[i][j]) / .01
               for t in x]

        prep_dir("test/output/hky")

        rplot_start("test/output/hky/hky_deriv.pdf")
        rplot("plot", x, y, t="l", ylim=[min(dy + y), max(dy + y)])
        rp.lines(x, dy, col="red")
        rp.lines(x, dy2, col="blue")
        rplot_end(True)
Example #8
0
        def dbranchlk(probs1, probs2, seqlen, bgfreq, kappa, t):

            model1 = spidir.make_hky_matrix(bgfreq, kappa, t)
            model2 = spidir.make_hky_matrix(bgfreq, kappa, 0.0)

            dmodel1 = spidir.make_hky_deriv_matrix(bgfreq, kappa, t)
            dmodel2 = spidir.make_hky_deriv_matrix(bgfreq, kappa, 0.0)

            logl = 0.0
            for j in xrange(seqlen):
                ds = sum(bgfreq[k] * sum(dmodel1[k][x] * probs1[4 * j + x]
                                         for x in xrange(4)) *
                         sum(model2[k][y] * probs2[4 * j + y]
                             for y in xrange(4)) for k in xrange(4))

                s = sum(bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x]
                                        for x in xrange(4)) *
                        sum(model2[k][y] * probs2[4 * j + y]
                            for y in xrange(4)) for k in xrange(4))

                logl += safediv(ds, s, INF)
            return logl
Example #9
0
    def test_hky_deriv(self):
        """general test"""
        
        bgfreq = [.3, .2, .3, .2]
        kappa = 2.0
        i = random.randint(0, 3)
        j = random.randint(0, 3)
        
        x = list(frange(0, 1.0, .01))
        y = [spidir.make_hky_matrix(bgfreq, kappa, t)[i][j]
             for t in x]
        dy = [spidir.make_hky_deriv_matrix(bgfreq, kappa, t)[i][j]
              for t in x]
        dy2 = [(spidir.make_hky_matrix(bgfreq, kappa, t+.01)[i][j] -
                spidir.make_hky_matrix(bgfreq, kappa, t)[i][j]) / .01
               for t in x]

        prep_dir("test/output/hky")
                
        rplot_start("test/output/hky/hky_deriv.pdf")
        rplot("plot", x, y, t="l", ylim=[min(dy + y), max(dy + y)])
        rp.lines(x, dy, col="red")
        rp.lines(x, dy2, col="blue")
        rplot_end(True)
Example #10
0
        def d2branchlk(probs1, probs2, seqlen, bgfreq, kappa, t):

            model1 = spidir.make_hky_matrix(bgfreq, kappa, t)
            model2 = spidir.make_hky_matrix(bgfreq, kappa, 0.0)

            dmodel1 = spidir.make_hky_deriv_matrix(bgfreq, kappa, t)
            dmodel2 = spidir.make_hky_deriv_matrix(bgfreq, kappa, 0.0)

            d2model1 = spidir.make_hky_deriv2_matrix(bgfreq, kappa, t)
            d2model2 = spidir.make_hky_deriv2_matrix(bgfreq, kappa, 0.0)

            logl = 0.0
            for j in xrange(seqlen):
                g = sum(
                    bgfreq[k]
                    * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4))
                    * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4))
                    for k in xrange(4)
                )

                dg = sum(
                    bgfreq[k]
                    * sum(dmodel1[k][x] * probs1[4 * j + x] for x in xrange(4))
                    * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4))
                    for k in xrange(4)
                )

                d2g = sum(
                    bgfreq[k]
                    * sum(d2model1[k][x] * probs1[4 * j + x] for x in xrange(4))
                    * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4))
                    for k in xrange(4)
                )

                logl += -safediv(dg * dg, g * g, INF) + safediv(d2g, g, INF)
            return logl
Example #11
0
    def test_JC(self):
        """test equivalence to JC"""

        bgfreq = [.25, .25, .25, .25]
        kappa = 1.0

        for t in frange(0, 1.0, .1):
            mat = spidir.make_hky_matrix(bgfreq, kappa, t)

            a = 1 / 3.0
            r = (1 / 4.0) * (1 + 3 * exp(-4 * a * t))
            s = (1 / 4.0) * (1 - exp(-4 * a * t))

            mat2 = [[r, s, s, s], [s, r, s, s], [s, s, r, s], [s, s, s, r]]

            for i in xrange(4):
                for j in xrange(4):
                    fequal(mat[i][j], mat2[i][j])
Example #12
0
    def test1(self):

        # (0, 1):2
        # P(x_0,x_1|t) = \sum_{x_2} P(x_0|x_2,t_0) * P(x_1|x_2,t_1) * P(x_2)
        # P(x_0|x_1,t) = P(x_0,x_1|t) / P(x_1|t) = P(x_0,x_1|t) / P(x_1)

        for t in frange(0.1, 2.0, 0.1):
            bgfreq = [0.25, 0.25, 0.25, 0.25]
            kappa = 1.0
            tree = treelib.parse_newick("(a:%f,b:%f)" % (t * 0.5, t * 0.5))
            align = {"a": "C", "b": "T"}

            l = spidir.calc_seq_likelihood_hky(tree, align, bgfreq, kappa) - log(0.25)
            mat = spidir.make_hky_matrix(bgfreq, kappa, t)
            l2 = log(mat[0][1])

            print l, l2
            fequal(l, l2)
Example #13
0
    def test1(self):

        # (0, 1):2
        # P(x_0,x_1|t) = \sum_{x_2} P(x_0|x_2,t_0) * P(x_1|x_2,t_1) * P(x_2)
        # P(x_0|x_1,t) = P(x_0,x_1|t) / P(x_1|t) = P(x_0,x_1|t) / P(x_1)

        for t in frange(.1, 2.0, .1):
            bgfreq = [.25, .25, .25, .25]
            kappa = 1.0
            tree = treelib.parse_newick("(a:%f,b:%f)" % (t * .5, t * .5))
            align = {"a": "C", "b": "T"}

            l = spidir.calc_seq_likelihood_hky(tree, align, bgfreq, kappa) \
                - log(.25)
            mat = spidir.make_hky_matrix(bgfreq, kappa, t)
            l2 = log(mat[0][1])

            print l, l2
            fequal(l, l2)
Example #14
0
    def test_jc(self):
        """test equivalence to JC"""
        
        bgfreq = [.25, .25, .25, .25]
        kappa = 1.0

        for t in frange(0, 1.0, .1):
            mat = spidir.make_hky_matrix(bgfreq, kappa, t)

            a = 1/3.0
            r = (1/4.0)*(1 + 3*exp(-4*a*t))
            s = (1/4.0)*(1 - exp(-4*a*t))

            mat2 = [[r, s, s, s],
                    [s, r, s, s],
                    [s, s, r, s],
                    [s, s, s, r]]

            for i in xrange(4):
                for j in xrange(4):
                    fequal(mat[i][j], mat2[i][j])