def dup_loss_topology_prior(tree, stree, recon, birth, death, maxdoom=20, events=None): """ Returns the log prior of a gene tree topology according to dup-loss model """ def gene2species(gene): return recon[tree.nodes[gene]].name if events is None: events = phylo.label_events(tree, recon) leaves = set(tree.leaves()) phylo.add_implied_spec_nodes(tree, stree, recon, events) pstree, snodes, snodelookup = spidir.make_ptree(stree) # get doomtable doomtable = calc_doom_table(stree, birth, death, maxdoom) prod = 0.0 for node in tree: if events[node] == "spec": for schild in recon[node].children: nodes2 = [x for x in node.children if recon[x] == schild] if len(nodes2) > 0: node2 = nodes2[0] subleaves = get_sub_tree(node2, schild, recon, events) nhist = birthdeath.num_topology_histories(node2, subleaves) s = len(subleaves) thist = stats.factorial(s) * stats.factorial(s - 1) / 2 ** (s - 1) if len(set(subleaves) & leaves) == 0: # internal prod += log(num_redundant_topology(node2, gene2species, subleaves, True)) else: # leaves prod += log(num_redundant_topology(node2, gene2species, subleaves, False)) else: nhist = 1.0 thist = 1.0 s = 0 t = sum( stats.choose(s + i, i) * birthdeath.prob_birth_death1(s + i, schild.dist, birth, death) * exp(doomtable[snodelookup[schild]]) ** i for i in range(maxdoom + 1) ) prod += log(nhist) - log(thist) + log(t) # correct for renumbering nt = num_redundant_topology(tree.root, gene2species) prod -= log(nt) # phylo.removeImpliedSpecNodes(tree, recon, events) treelib.remove_single_children(tree) return prod
def gegenbauer3(n, a, z): tot = 0 for k in xrange(int(n/2)+1): tot += ((-1)**k * stats.gamma(n - k + a) / ( stats.gamma(a) * stats.factorial(k) * stats.factorial(n - 2*k)) * ((2*z) ** (n - 2*k))) return tot
def prob_coal_counts(u, v, t, n): """ The probabiluty of going from 'u' lineages to 'v' lineages in time 't' with population size 'n' """ T = t / n s = 0.0 for k in xrange(v, u+1): a = exp(-k*(k-1)*T/2.0) * (2*k-1)*(-1)**(k-v) / stats.factorial(v) / \ stats.factorial(k-v) / (k+v-1) * \ stats.prod((v+y)*(u-y)/(u+y) for y in xrange(k)) s += a return s
def prob_coal_counts_slow(a, b, t, n): """ The probabiluty of going from 'a' lineages to 'b' lineages in time 't' with population size 'n' Implemented more directly, but slower. Good for testing against. """ s = 0.0 for k in range(b, a + 1): i = exp(-k*(k-1)*t/2.0/n) * \ float(2*k-1)*(-1)**(k-b) / stats.factorial(b) / \ stats.factorial(k-b) / (k+b-1) * \ stats.prod((b+y)*(a-y)/float(a+y) for y in range(k)) s += i return s
def legendre_poly(n): """ \frac{1}{2^n n!} d^n/dx^n [(x^2 - 1)^n] """ return simplify(('mult', ('scalar', 1.0 / (2 ** n * stats.factorial(n))), derivate(('power', ('add', ('power', ('var', 'x'), ('scalar', 2)), ('scalar', -1)), ('scalar', n)), 'x', n)))
def prob_coal_counts_slow(a, b, t, n): """ The probabiluty of going from 'a' lineages to 'b' lineages in time 't' with population size 'n' Implemented more directly, but slower. Good for testing against. """ s = 0.0 for k in xrange(b, a + 1): i = ( exp(-k * (k - 1) * t / 2.0 / n) * float(2 * k - 1) * (-1) ** (k - b) / stats.factorial(b) / stats.factorial(k - b) / (k + b - 1) * stats.prod((b + y) * (a - y) / float(a + y) for y in xrange(k)) ) s += i return s
def num_redundant_topology(node, gene2species, leaves=None, all_leaves=False): """Returns the number of 'redundant' topologies""" if leaves is None: leaves = node.leaves() leaves = set(leaves) colors = {} nmirrors = [0] def walk(node): if node in leaves: colors[node] = phylo.hash_tree(node, gene2species) else: # recurse for child in node.children: walk(child) childHashes = util.mget(colors, node.children) if len(childHashes) > 1 and util.equal(*childHashes): nmirrors[0] += 1 childHashes.sort() colors[node] = phylo.hash_tree_compose(childHashes) walk(node) colorsizes = util.hist_dict(util.mget(colors, leaves)).values() if all_leaves: val = stats.factorial(len(leaves)) else: val = 1 for s in colorsizes: if s > 1: val *= stats.factorial(s) # print "py val=", val, "nmirrors=", nmirrors[0] return val / (2 ** nmirrors[0])
def num_redundant_topology(node, gene2species, leaves=None, all_leaves=False): """Returns the number of 'redundant' topologies""" if leaves is None: leaves = node.leaves() leaves = set(leaves) colors = {} nmirrors = [0] def walk(node): if node in leaves: colors[node] = phylo.hash_tree(node, gene2species) else: # recurse for child in node.children: walk(child) childHashes = util.mget(colors, node.children) if len(childHashes) > 1 and util.equal(*childHashes): nmirrors[0] += 1 childHashes.sort() colors[node] = phylo.hash_tree_compose(childHashes) walk(node) colorsizes = util.hist_dict(util.mget(colors, leaves)).values() if all_leaves: val = stats.factorial(len(leaves)) else: val = 1 for s in colorsizes: if s > 1: val *= stats.factorial(s) #print "py val=", val, "nmirrors=", nmirrors[0] return val / (2**nmirrors[0])
def prob_coal_counts(a, b, t, n): """ The probability of going from 'a' lineages to 'b' lineages in time 't' with population size 'n' """ try: terms = [] C = stats.prod((b + y) * (a - y) / float(a + y) for y in xrange(b)) / float(stats.factorial(b)) terms.append(exp(-b * (b - 1) * t / 2.0 / n) * C) for k in xrange(b + 1, a + 1): k1 = k - 1 C = (b + k1) * (a - k1) / float(a + k1) / float(b - k) * C terms.append(exp(-k * k1 * t / 2.0 / n) * (2 * k - 1) / float(k1 + b) * C) terms.sort(key=abs) return kahan_sum(terms) except: print a, b, t, n raise
def prob_coal_counts(a, b, t, n): """ The probability of going from 'a' lineages to 'b' lineages in time 't' with population size 'n' """ try: terms = [] C = stats.prod((b+y)*(a-y)/float(a+y) for y in range(b)) \ / float(stats.factorial(b)) terms.append(exp(-b * (b - 1) * t / 2.0 / n) * C) for k in range(b + 1, a + 1): k1 = k - 1 C = (b + k1) * (a - k1) / float(a + k1) / float(b - k) * C terms.append( exp(-k * k1 * t / 2.0 / n) * (2 * k - 1) / float(k1 + b) * C) terms.sort(key=abs) return kahan_sum(terms) except: print(a, b, t, n) raise
def dup_loss_topology_prior(tree, stree, recon, birth, death, maxdoom=20, events=None): """ Returns the log prior of a gene tree topology according to dup-loss model """ def gene2species(gene): return recon[tree.nodes[gene]].name if events is None: events = phylo.label_events(tree, recon) leaves = set(tree.leaves()) phylo.add_implied_spec_nodes(tree, stree, recon, events) pstree, snodes, snodelookup = spidir.make_ptree(stree) # get doomtable doomtable = calc_doom_table(stree, birth, death, maxdoom) prod = 0.0 for node in tree: if events[node] == "spec": for schild in recon[node].children: nodes2 = [x for x in node.children if recon[x] == schild] if len(nodes2) > 0: node2 = nodes2[0] subleaves = get_sub_tree(node2, schild, recon, events) nhist = birthdeath.num_topology_histories(node2, subleaves) s = len(subleaves) thist = stats.factorial(s) * stats.factorial(s - 1) / 2**( s - 1) if len(set(subleaves) & leaves) == 0: # internal prod += log( num_redundant_topology(node2, gene2species, subleaves, True)) else: # leaves prod += log( num_redundant_topology(node2, gene2species, subleaves, False)) else: nhist = 1.0 thist = 1.0 s = 0 t = sum( stats.choose(s + i, i) * birthdeath.prob_birth_death1( s + i, schild.dist, birth, death) * exp(doomtable[snodelookup[schild]])**i for i in range(maxdoom + 1)) prod += log(nhist) - log(thist) + log(t) # correct for renumbering nt = num_redundant_topology(tree.root, gene2species) prod -= log(nt) #phylo.removeImpliedSpecNodes(tree, recon, events) treelib.remove_single_children(tree) return prod