def test_branch_likelihood_hky(self): """Test likelihood function""" # params bgfreq = [0.2, 0.3, 0.3, 0.2] kappa = 1.59 seqlen = 100 div = 0.1 t0 = 0 t1 = 0.5 step = 0.001 # prep probabilities probs1 = [] probs2 = [] for i in xrange(seqlen): if random.random() < div: for j in xrange(4): probs1.append(0.01) probs2.append(0.01) k = random.randint(1, 4) probs1[-k] = 1.0 k = (k % 4) + 1 probs2[-k] = 1.0 else: for j in xrange(4): probs1.append(0.2) probs2.append(0.2) k = random.randint(1, 4) probs1[-k] = 1.0 probs2[-k] = 1.0 # estimate MLE mle = spidir.mle_distance_hky(probs1, probs2, seqlen, bgfreq, kappa, t0, t1) x = list(frange(0, 0.5, 0.01)) y = [spidir.branch_likelihood_hky(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x] top = x[argmax(y)] print div, top, mle prep_dir("test/output/branch_likelihood/") rplot_start("test/output/branch_likelihood/branch_function.pdf") rplot("plot", x, y, t="l", xlab="distance", ylab="likelihood") # rp.lines([div, div], [-1e300, 0], col="green") rp.lines([top, top], [-1e300, 0], col="blue") rp.lines([mle, mle], [-1e300, 0], col="red") rplot_end(True) # ================================ # 1st derivative x = list(frange(0, 0.5, 0.01)) dy = [spidir.branch_likelihood_hky_deriv(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x] dy2 = [ ( spidir.branch_likelihood_hky(probs1, probs2, seqlen, bgfreq, kappa, t + 0.01) - spidir.branch_likelihood_hky(probs1, probs2, seqlen, bgfreq, kappa, t) ) / 0.01 for t in x ] rplot_start("test/output/branch_likelihood/deriv_branch_function.pdf") rplot("plot", x, dy2, t="l", xlab="distance", ylab="d/dt likelihood") rp.lines([min(x), max(x)], [0, 0], col="black") rp.lines(x, dy, col="grey") # rp.lines([div, div], [-1e300, 0], col="green") rp.lines([top, top], [-1e300, 0], col="blue") rp.lines([mle, mle], [-1e300, 0], col="red") rplot_end(True) # ============================= # 2nd derivative x = list(frange(0, 0.5, 0.01)) d2y = [spidir.branch_likelihood_hky_deriv2(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x] d2y2 = [ ( spidir.branch_likelihood_hky_deriv(probs1, probs2, seqlen, bgfreq, kappa, t + 0.01) - spidir.branch_likelihood_hky_deriv(probs1, probs2, seqlen, bgfreq, kappa, t) ) / 0.01 for t in x ] rplot_start("test/output/branch_likelihood/deriv2_branch_function.pdf") rplot("plot", x, d2y2, t="l", xlab="distance", ylab="d^2/dt^2 likelihood") rp.lines([min(x), max(x)], [0, 0], col="black") rp.lines(x, d2y, col="grey") # rp.lines([div, div], [-1e300, 0], col="green") # rp.lines([top, top], [-1e300, 0], col="blue") # rp.lines([mle, mle], [-1e300, 0], col="red") rplot_end(True)
def test_branch_likelihood_hky(self): """Test likelihood function""" # params bgfreq = [.2, .3, .3, .2] kappa = 1.59 seqlen = 100 div = .1 t0 = 0 t1 = .5 step = .001 # prep probabilities probs1 = [] probs2 = [] for i in xrange(seqlen): if random.random() < div: for j in xrange(4): probs1.append(.01) probs2.append(.01) k = random.randint(1, 4) probs1[-k] = 1.0 k = (k % 4) + 1 probs2[-k] = 1.0 else: for j in xrange(4): probs1.append(.2) probs2.append(.2) k = random.randint(1, 4) probs1[-k] = 1.0 probs2[-k] = 1.0 # estimate MLE mle = spidir.mle_distance_hky(probs1, probs2, seqlen, bgfreq, kappa, t0, t1) x = list(frange(0, .5, .01)) y = [ spidir.branch_likelihood_hky(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x ] top = x[argmax(y)] print div, top, mle prep_dir("test/output/branch_likelihood/") rplot_start("test/output/branch_likelihood/branch_function.pdf") rplot("plot", x, y, t="l", xlab="distance", ylab="likelihood") #rp.lines([div, div], [-1e300, 0], col="green") rp.lines([top, top], [-1e300, 0], col="blue") rp.lines([mle, mle], [-1e300, 0], col="red") rplot_end(True) #================================ # 1st derivative x = list(frange(0, .5, .01)) dy = [ spidir.branch_likelihood_hky_deriv(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x ] dy2 = [(spidir.branch_likelihood_hky(probs1, probs2, seqlen, bgfreq, kappa, t + .01) - spidir.branch_likelihood_hky(probs1, probs2, seqlen, bgfreq, kappa, t)) / .01 for t in x] rplot_start("test/output/branch_likelihood/deriv_branch_function.pdf") rplot("plot", x, dy2, t="l", xlab="distance", ylab="d/dt likelihood") rp.lines([min(x), max(x)], [0, 0], col="black") rp.lines(x, dy, col="grey") #rp.lines([div, div], [-1e300, 0], col="green") rp.lines([top, top], [-1e300, 0], col="blue") rp.lines([mle, mle], [-1e300, 0], col="red") rplot_end(True) #============================= # 2nd derivative x = list(frange(0, .5, .01)) d2y = [ spidir.branch_likelihood_hky_deriv2(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x ] d2y2 = [(spidir.branch_likelihood_hky_deriv(probs1, probs2, seqlen, bgfreq, kappa, t + .01) - spidir.branch_likelihood_hky_deriv(probs1, probs2, seqlen, bgfreq, kappa, t)) / .01 for t in x] rplot_start("test/output/branch_likelihood/deriv2_branch_function.pdf") rplot("plot", x, d2y2, t="l", xlab="distance", ylab="d^2/dt^2 likelihood") rp.lines([min(x), max(x)], [0, 0], col="black") rp.lines(x, d2y, col="grey") #rp.lines([div, div], [-1e300, 0], col="green") #rp.lines([top, top], [-1e300, 0], col="blue") #rp.lines([mle, mle], [-1e300, 0], col="red") rplot_end(True)
def _test_calc_lktable_row(self): """test the function CalcLktbaleRow""" def branchlk(probs1, probs2, seqlen, bgfreq, kappa, t): model1 = spidir.make_hky_matrix(bgfreq, kappa, t) model2 = spidir.make_hky_matrix(bgfreq, kappa, 0) logl = 0.0 for j in xrange(seqlen): s = sum( bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4) ) logl += safelog(s, e) return logl def dbranchlk(probs1, probs2, seqlen, bgfreq, kappa, t): model1 = spidir.make_hky_matrix(bgfreq, kappa, t) model2 = spidir.make_hky_matrix(bgfreq, kappa, 0.0) dmodel1 = spidir.make_hky_deriv_matrix(bgfreq, kappa, t) dmodel2 = spidir.make_hky_deriv_matrix(bgfreq, kappa, 0.0) logl = 0.0 for j in xrange(seqlen): ds = sum( bgfreq[k] * sum(dmodel1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4) ) s = sum( bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4) ) logl += safediv(ds, s, INF) return logl def d2branchlk(probs1, probs2, seqlen, bgfreq, kappa, t): model1 = spidir.make_hky_matrix(bgfreq, kappa, t) model2 = spidir.make_hky_matrix(bgfreq, kappa, 0.0) dmodel1 = spidir.make_hky_deriv_matrix(bgfreq, kappa, t) dmodel2 = spidir.make_hky_deriv_matrix(bgfreq, kappa, 0.0) d2model1 = spidir.make_hky_deriv2_matrix(bgfreq, kappa, t) d2model2 = spidir.make_hky_deriv2_matrix(bgfreq, kappa, 0.0) logl = 0.0 for j in xrange(seqlen): g = sum( bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4) ) dg = sum( bgfreq[k] * sum(dmodel1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4) ) d2g = sum( bgfreq[k] * sum(d2model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4) ) logl += -safediv(dg * dg, g * g, INF) + safediv(d2g, g, INF) return logl bgfreq = [0.25, 0.25, 0.25, 0.25] kappa = 1.59 seqlen = 100 # prep probabilities div = 0.1 probs1 = [] probs2 = [] for i in xrange(seqlen): if random.random() < div: for j in xrange(4): probs1.append(0.01) probs2.append(0.01) k = random.randint(1, 4) probs1[-k] = 1.0 k = (k % 4) + 1 probs2[-k] = 1.0 else: for j in xrange(4): probs1.append(0.2) probs2.append(0.2) k = random.randint(1, 4) probs1[-k] = 1.0 probs2[-k] = 1.0 # probs1 = [0.0, 0.0, 1.0, 0.0] + \ # [1.0, 0.0, 0.0, 0.0] * 5 # probs2 = [0.0, 0.0, 0.0, 1.0] + \ # [1.0, 0.0, 0.0, 0.0] * 5 # seqlen = len(probs1) / 4 x = list(frange(0, 1.0, 0.01)) y = [spidir.branch_likelihood_hky(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x] y2 = [branchlk(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x] prep_dir("test/output/branch_likelihood_simple/") rplot_start("test/output/branch_likelihood_simple/cmp_c_py.pdf") rplot("plot", x, y, t="l") rp.lines(x, y2, t="l", col="red") rplot_end(True) x = list(frange(0, 1.0, 0.01)) y = [spidir.branch_likelihood_hky_deriv(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x] y2 = [dbranchlk(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x] rplot_start("test/output/branch_likelihood_simple/cmp_c_py_deriv.pdf") rplot("plot", x, y, t="l") rp.lines(x, y2, t="l", col="red") rplot_end(True) x = list(frange(0, 1.0, 0.01)) y = [spidir.branch_likelihood_hky_deriv2(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x] # y = [(spidir.branch_likelihood_hky_deriv(probs1, probs2, seqlen, # bgfreq, kappa, t+.01) - # spidir.branch_likelihood_hky_deriv(probs1, probs2, seqlen, # bgfreq, kappa, t)) / .01 # for t in x] y2 = [d2branchlk(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x] rplot_start("test/output/branch_likelihood_simple/cmp_c_py_deriv2.pdf") rplot("plot", x, y, t="l") rp.lines(x, y2, t="l", col="red") rplot_end(True)
def _test_calc_lktable_row(self): """test the function CalcLktbaleRow""" def branchlk(probs1, probs2, seqlen, bgfreq, kappa, t): model1 = spidir.make_hky_matrix(bgfreq, kappa, t) model2 = spidir.make_hky_matrix(bgfreq, kappa, 0) logl = 0.0 for j in xrange(seqlen): s = sum(bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4)) logl += safelog(s, e) return logl def dbranchlk(probs1, probs2, seqlen, bgfreq, kappa, t): model1 = spidir.make_hky_matrix(bgfreq, kappa, t) model2 = spidir.make_hky_matrix(bgfreq, kappa, 0.0) dmodel1 = spidir.make_hky_deriv_matrix(bgfreq, kappa, t) dmodel2 = spidir.make_hky_deriv_matrix(bgfreq, kappa, 0.0) logl = 0.0 for j in xrange(seqlen): ds = sum(bgfreq[k] * sum(dmodel1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4)) s = sum(bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4)) logl += safediv(ds, s, INF) return logl def d2branchlk(probs1, probs2, seqlen, bgfreq, kappa, t): model1 = spidir.make_hky_matrix(bgfreq, kappa, t) model2 = spidir.make_hky_matrix(bgfreq, kappa, 0.0) dmodel1 = spidir.make_hky_deriv_matrix(bgfreq, kappa, t) dmodel2 = spidir.make_hky_deriv_matrix(bgfreq, kappa, 0.0) d2model1 = spidir.make_hky_deriv2_matrix(bgfreq, kappa, t) d2model2 = spidir.make_hky_deriv2_matrix(bgfreq, kappa, 0.0) logl = 0.0 for j in xrange(seqlen): g = sum(bgfreq[k] * sum(model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4)) dg = sum(bgfreq[k] * sum(dmodel1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4)) d2g = sum(bgfreq[k] * sum(d2model1[k][x] * probs1[4 * j + x] for x in xrange(4)) * sum(model2[k][y] * probs2[4 * j + y] for y in xrange(4)) for k in xrange(4)) logl += - safediv(dg*dg, g*g, INF) + \ safediv(d2g, g, INF) return logl bgfreq = [.25, .25, .25, .25] kappa = 1.59 seqlen = 100 # prep probabilities div = .1 probs1 = [] probs2 = [] for i in xrange(seqlen): if random.random() < div: for j in xrange(4): probs1.append(.01) probs2.append(.01) k = random.randint(1, 4) probs1[-k] = 1.0 k = (k % 4) + 1 probs2[-k] = 1.0 else: for j in xrange(4): probs1.append(.2) probs2.append(.2) k = random.randint(1, 4) probs1[-k] = 1.0 probs2[-k] = 1.0 #probs1 = [0.0, 0.0, 1.0, 0.0] + \ # [1.0, 0.0, 0.0, 0.0] * 5 #probs2 = [0.0, 0.0, 0.0, 1.0] + \ # [1.0, 0.0, 0.0, 0.0] * 5 #seqlen = len(probs1) / 4 x = list(frange(0, 1.0, .01)) y = [ spidir.branch_likelihood_hky(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x ] y2 = [branchlk(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x] prep_dir("test/output/branch_likelihood_simple/") rplot_start("test/output/branch_likelihood_simple/cmp_c_py.pdf") rplot("plot", x, y, t="l") rp.lines(x, y2, t="l", col="red") rplot_end(True) x = list(frange(0, 1.0, .01)) y = [ spidir.branch_likelihood_hky_deriv(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x ] y2 = [dbranchlk(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x] rplot_start("test/output/branch_likelihood_simple/cmp_c_py_deriv.pdf") rplot("plot", x, y, t="l") rp.lines(x, y2, t="l", col="red") rplot_end(True) x = list(frange(0, 1.0, .01)) y = [ spidir.branch_likelihood_hky_deriv2(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x ] #y = [(spidir.branch_likelihood_hky_deriv(probs1, probs2, seqlen, # bgfreq, kappa, t+.01) - # spidir.branch_likelihood_hky_deriv(probs1, probs2, seqlen, # bgfreq, kappa, t)) / .01 # for t in x] y2 = [d2branchlk(probs1, probs2, seqlen, bgfreq, kappa, t) for t in x] rplot_start("test/output/branch_likelihood_simple/cmp_c_py_deriv2.pdf") rplot("plot", x, y, t="l") rp.lines(x, y2, t="l", col="red") rplot_end(True)