def dbranchlk(probs1, probs2, seqlen, bgfreq, kappa, t): model1 = spidir.make_hky_matrix(bgfreq, kappa, t) model2 = spidir.make_hky_matrix(bgfreq, kappa, 0.0) dmodel1 = spidir.make_hky_deriv_matrix(bgfreq, kappa, t) dmodel2 = spidir.make_hky_deriv_matrix(bgfreq, kappa, 0.0) logl = 0.0 for j in xrange(seqlen): ds = sum( bgfreq[k] * sum(dmodel1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4) ) s = sum( bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4) ) logl += safediv(ds, s, INF) return logl
def d2branchlk(probs1, probs2, seqlen, bgfreq, kappa, t): model1 = spidir.make_hky_matrix(bgfreq, kappa, t) model2 = spidir.make_hky_matrix(bgfreq, kappa, 0.0) dmodel1 = spidir.make_hky_deriv_matrix(bgfreq, kappa, t) dmodel2 = spidir.make_hky_deriv_matrix(bgfreq, kappa, 0.0) d2model1 = spidir.make_hky_deriv2_matrix(bgfreq, kappa, t) d2model2 = spidir.make_hky_deriv2_matrix(bgfreq, kappa, 0.0) logl = 0.0 for j in xrange(seqlen): g = sum(bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4)) dg = sum(bgfreq[k] * sum(dmodel1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4)) d2g = sum(bgfreq[k] * sum(d2model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4)) logl += - safediv(dg*dg, g*g, INF) + \ safediv(d2g, g, INF) return logl
def branchlk(probs1, probs2, seqlen, bgfreq, kappa, t): model1 = spidir.make_hky_matrix(bgfreq, kappa, t) model2 = spidir.make_hky_matrix(bgfreq, kappa, 0) logl = 0.0 for j in xrange(seqlen): s = sum(bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4)) logl += safelog(s, e) return logl
def branchlk(probs1, probs2, seqlen, bgfreq, kappa, t): model1 = spidir.make_hky_matrix(bgfreq, kappa, t) model2 = spidir.make_hky_matrix(bgfreq, kappa, 0) logl = 0.0 for j in xrange(seqlen): s = sum( bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4) ) logl += safelog(s, e) return logl
def test_hky(self): """general test""" bgfreq = [.3, .2, .3, .2] kappa = 2.0 t = 0.2 pprint(spidir.make_hky_matrix(bgfreq, kappa, t))
def test_hky(self): """general test""" bgfreq = [.3, .2, .3, .2] kappa = 2.0 t = 0.2 pprint(spidir.make_hky_matrix(bgfreq, kappa, t)) pprint(phylo.make_hky_matrix(t, bgfreq, kappa))
def test_hky_deriv(self): """general test""" bgfreq = [.3, .2, .3, .2] kappa = 2.0 i = random.randint(0, 3) j = random.randint(0, 3) x = list(frange(0, 1.0, .01)) y = [spidir.make_hky_matrix(bgfreq, kappa, t)[i][j] for t in x] dy = [spidir.make_hky_deriv_matrix(bgfreq, kappa, t)[i][j] for t in x] dy2 = [(spidir.make_hky_matrix(bgfreq, kappa, t + .01)[i][j] - spidir.make_hky_matrix(bgfreq, kappa, t)[i][j]) / .01 for t in x] prep_dir("test/output/hky") rplot_start("test/output/hky/hky_deriv.pdf") rplot("plot", x, y, t="l", ylim=[min(dy + y), max(dy + y)]) rp.lines(x, dy, col="red") rp.lines(x, dy2, col="blue") rplot_end(True)
def dbranchlk(probs1, probs2, seqlen, bgfreq, kappa, t): model1 = spidir.make_hky_matrix(bgfreq, kappa, t) model2 = spidir.make_hky_matrix(bgfreq, kappa, 0.0) dmodel1 = spidir.make_hky_deriv_matrix(bgfreq, kappa, t) dmodel2 = spidir.make_hky_deriv_matrix(bgfreq, kappa, 0.0) logl = 0.0 for j in xrange(seqlen): ds = sum(bgfreq[k] * sum(dmodel1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4)) s = sum(bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4)) logl += safediv(ds, s, INF) return logl
def test_hky_deriv(self): """general test""" bgfreq = [.3, .2, .3, .2] kappa = 2.0 i = random.randint(0, 3) j = random.randint(0, 3) x = list(frange(0, 1.0, .01)) y = [spidir.make_hky_matrix(bgfreq, kappa, t)[i][j] for t in x] dy = [spidir.make_hky_deriv_matrix(bgfreq, kappa, t)[i][j] for t in x] dy2 = [(spidir.make_hky_matrix(bgfreq, kappa, t+.01)[i][j] - spidir.make_hky_matrix(bgfreq, kappa, t)[i][j]) / .01 for t in x] prep_dir("test/output/hky") rplot_start("test/output/hky/hky_deriv.pdf") rplot("plot", x, y, t="l", ylim=[min(dy + y), max(dy + y)]) rp.lines(x, dy, col="red") rp.lines(x, dy2, col="blue") rplot_end(True)
def d2branchlk(probs1, probs2, seqlen, bgfreq, kappa, t): model1 = spidir.make_hky_matrix(bgfreq, kappa, t) model2 = spidir.make_hky_matrix(bgfreq, kappa, 0.0) dmodel1 = spidir.make_hky_deriv_matrix(bgfreq, kappa, t) dmodel2 = spidir.make_hky_deriv_matrix(bgfreq, kappa, 0.0) d2model1 = spidir.make_hky_deriv2_matrix(bgfreq, kappa, t) d2model2 = spidir.make_hky_deriv2_matrix(bgfreq, kappa, 0.0) logl = 0.0 for j in xrange(seqlen): g = sum( bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4) ) dg = sum( bgfreq[k] * sum(dmodel1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4) ) d2g = sum( bgfreq[k] * sum(d2model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4) ) logl += -safediv(dg * dg, g * g, INF) + safediv(d2g, g, INF) return logl
def test_JC(self): """test equivalence to JC""" bgfreq = [.25, .25, .25, .25] kappa = 1.0 for t in frange(0, 1.0, .1): mat = spidir.make_hky_matrix(bgfreq, kappa, t) a = 1 / 3.0 r = (1 / 4.0) * (1 + 3 * exp(-4 * a * t)) s = (1 / 4.0) * (1 - exp(-4 * a * t)) mat2 = [[r, s, s, s], [s, r, s, s], [s, s, r, s], [s, s, s, r]] for i in xrange(4): for j in xrange(4): fequal(mat[i][j], mat2[i][j])
def test1(self): # (0, 1):2 # P(x_0,x_1|t) = \sum_{x_2} P(x_0|x_2,t_0) * P(x_1|x_2,t_1) * P(x_2) # P(x_0|x_1,t) = P(x_0,x_1|t) / P(x_1|t) = P(x_0,x_1|t) / P(x_1) for t in frange(0.1, 2.0, 0.1): bgfreq = [0.25, 0.25, 0.25, 0.25] kappa = 1.0 tree = treelib.parse_newick("(a:%f,b:%f)" % (t * 0.5, t * 0.5)) align = {"a": "C", "b": "T"} l = spidir.calc_seq_likelihood_hky(tree, align, bgfreq, kappa) - log(0.25) mat = spidir.make_hky_matrix(bgfreq, kappa, t) l2 = log(mat[0][1]) print l, l2 fequal(l, l2)
def test1(self): # (0, 1):2 # P(x_0,x_1|t) = \sum_{x_2} P(x_0|x_2,t_0) * P(x_1|x_2,t_1) * P(x_2) # P(x_0|x_1,t) = P(x_0,x_1|t) / P(x_1|t) = P(x_0,x_1|t) / P(x_1) for t in frange(.1, 2.0, .1): bgfreq = [.25, .25, .25, .25] kappa = 1.0 tree = treelib.parse_newick("(a:%f,b:%f)" % (t * .5, t * .5)) align = {"a": "C", "b": "T"} l = spidir.calc_seq_likelihood_hky(tree, align, bgfreq, kappa) \ - log(.25) mat = spidir.make_hky_matrix(bgfreq, kappa, t) l2 = log(mat[0][1]) print l, l2 fequal(l, l2)
def test_jc(self): """test equivalence to JC""" bgfreq = [.25, .25, .25, .25] kappa = 1.0 for t in frange(0, 1.0, .1): mat = spidir.make_hky_matrix(bgfreq, kappa, t) a = 1/3.0 r = (1/4.0)*(1 + 3*exp(-4*a*t)) s = (1/4.0)*(1 - exp(-4*a*t)) mat2 = [[r, s, s, s], [s, r, s, s], [s, s, r, s], [s, s, s, r]] for i in xrange(4): for j in xrange(4): fequal(mat[i][j], mat2[i][j])