Esempio n. 1
0
def dup_loss_topology_prior(tree, stree, recon, birth, death, maxdoom=20, events=None):
    """
    Returns the log prior of a gene tree topology according to dup-loss model
    """

    def gene2species(gene):
        return recon[tree.nodes[gene]].name

    if events is None:
        events = phylo.label_events(tree, recon)
    leaves = set(tree.leaves())
    phylo.add_implied_spec_nodes(tree, stree, recon, events)

    pstree, snodes, snodelookup = spidir.make_ptree(stree)

    # get doomtable
    doomtable = calc_doom_table(stree, birth, death, maxdoom)

    prod = 0.0
    for node in tree:
        if events[node] == "spec":
            for schild in recon[node].children:
                nodes2 = [x for x in node.children if recon[x] == schild]
                if len(nodes2) > 0:
                    node2 = nodes2[0]
                    subleaves = get_sub_tree(node2, schild, recon, events)
                    nhist = birthdeath.num_topology_histories(node2, subleaves)
                    s = len(subleaves)
                    thist = stats.factorial(s) * stats.factorial(s - 1) / 2 ** (s - 1)

                    if len(set(subleaves) & leaves) == 0:
                        # internal
                        prod += log(num_redundant_topology(node2, gene2species, subleaves, True))
                    else:
                        # leaves
                        prod += log(num_redundant_topology(node2, gene2species, subleaves, False))

                else:
                    nhist = 1.0
                    thist = 1.0
                    s = 0

                t = sum(
                    stats.choose(s + i, i)
                    * birthdeath.prob_birth_death1(s + i, schild.dist, birth, death)
                    * exp(doomtable[snodelookup[schild]]) ** i
                    for i in range(maxdoom + 1)
                )

                prod += log(nhist) - log(thist) + log(t)

    # correct for renumbering
    nt = num_redundant_topology(tree.root, gene2species)
    prod -= log(nt)

    # phylo.removeImpliedSpecNodes(tree, recon, events)
    treelib.remove_single_children(tree)

    return prod
Esempio n. 2
0
def gegenbauer3(n, a, z):

    tot = 0
    for k in xrange(int(n/2)+1):
        tot += ((-1)**k * stats.gamma(n - k + a) / (
                stats.gamma(a) * stats.factorial(k) * stats.factorial(n - 2*k))
                * ((2*z) ** (n - 2*k)))
    return tot
Esempio n. 3
0
def prob_coal_counts(u, v, t, n):
    """
    The probabiluty of going from 'u' lineages to 'v' lineages in time 't'
    with population size 'n'
    """
    
    T = t / n

    s = 0.0
    for k in xrange(v, u+1):
        a = exp(-k*(k-1)*T/2.0) * (2*k-1)*(-1)**(k-v) / stats.factorial(v) / \
            stats.factorial(k-v) / (k+v-1) * \
            stats.prod((v+y)*(u-y)/(u+y) for y in xrange(k))
        s += a
    return s
Esempio n. 4
0
def prob_coal_counts_slow(a, b, t, n):
    """
    The probabiluty of going from 'a' lineages to 'b' lineages in time 't'
    with population size 'n'

    Implemented more directly, but slower.  Good for testing against.
    """

    s = 0.0
    for k in range(b, a + 1):
        i = exp(-k*(k-1)*t/2.0/n) * \
            float(2*k-1)*(-1)**(k-b) / stats.factorial(b) / \
            stats.factorial(k-b) / (k+b-1) * \
            stats.prod((b+y)*(a-y)/float(a+y) for y in range(k))
        s += i
    return s
Esempio n. 5
0
def legendre_poly(n):

    """ \frac{1}{2^n n!} d^n/dx^n [(x^2 - 1)^n] """

    return simplify(('mult', ('scalar', 1.0 / (2 ** n * stats.factorial(n))),
                    derivate(('power', ('add', ('power', ('var', 'x'),
                                                         ('scalar', 2)),
                                               ('scalar', -1)),
                                       ('scalar', n)),
                             'x', n)))
Esempio n. 6
0
def prob_coal_counts_slow(a, b, t, n):
    """
    The probabiluty of going from 'a' lineages to 'b' lineages in time 't'
    with population size 'n'

    Implemented more directly, but slower.  Good for testing against.
    """

    s = 0.0
    for k in xrange(b, a + 1):
        i = (
            exp(-k * (k - 1) * t / 2.0 / n)
            * float(2 * k - 1)
            * (-1) ** (k - b)
            / stats.factorial(b)
            / stats.factorial(k - b)
            / (k + b - 1)
            * stats.prod((b + y) * (a - y) / float(a + y) for y in xrange(k))
        )
        s += i
    return s
Esempio n. 7
0
def num_redundant_topology(node, gene2species, leaves=None, all_leaves=False):
    """Returns the number of 'redundant' topologies"""

    if leaves is None:
        leaves = node.leaves()
    leaves = set(leaves)
    colors = {}
    nmirrors = [0]

    def walk(node):
        if node in leaves:
            colors[node] = phylo.hash_tree(node, gene2species)
        else:
            # recurse
            for child in node.children:
                walk(child)

            childHashes = util.mget(colors, node.children)
            if len(childHashes) > 1 and util.equal(*childHashes):
                nmirrors[0] += 1

            childHashes.sort()
            colors[node] = phylo.hash_tree_compose(childHashes)

    walk(node)

    colorsizes = util.hist_dict(util.mget(colors, leaves)).values()

    if all_leaves:
        val = stats.factorial(len(leaves))
    else:
        val = 1
        for s in colorsizes:
            if s > 1:
                val *= stats.factorial(s)
    # print "py val=", val, "nmirrors=", nmirrors[0]
    return val / (2 ** nmirrors[0])
Esempio n. 8
0
def num_redundant_topology(node, gene2species, leaves=None, all_leaves=False):
    """Returns the number of 'redundant' topologies"""

    if leaves is None:
        leaves = node.leaves()
    leaves = set(leaves)
    colors = {}
    nmirrors = [0]

    def walk(node):
        if node in leaves:
            colors[node] = phylo.hash_tree(node, gene2species)
        else:
            # recurse
            for child in node.children:
                walk(child)

            childHashes = util.mget(colors, node.children)
            if len(childHashes) > 1 and util.equal(*childHashes):
                nmirrors[0] += 1

            childHashes.sort()
            colors[node] = phylo.hash_tree_compose(childHashes)

    walk(node)

    colorsizes = util.hist_dict(util.mget(colors, leaves)).values()

    if all_leaves:
        val = stats.factorial(len(leaves))
    else:
        val = 1
        for s in colorsizes:
            if s > 1:
                val *= stats.factorial(s)
    #print "py val=", val, "nmirrors=", nmirrors[0]
    return val / (2**nmirrors[0])
Esempio n. 9
0
def prob_coal_counts(a, b, t, n):
    """
    The probability of going from 'a' lineages to 'b' lineages in time 't'
    with population size 'n'
    """

    try:
        terms = []
        C = stats.prod((b + y) * (a - y) / float(a + y) for y in xrange(b)) / float(stats.factorial(b))
        terms.append(exp(-b * (b - 1) * t / 2.0 / n) * C)
        for k in xrange(b + 1, a + 1):
            k1 = k - 1
            C = (b + k1) * (a - k1) / float(a + k1) / float(b - k) * C
            terms.append(exp(-k * k1 * t / 2.0 / n) * (2 * k - 1) / float(k1 + b) * C)

        terms.sort(key=abs)
        return kahan_sum(terms)
    except:
        print a, b, t, n
        raise
Esempio n. 10
0
def prob_coal_counts(a, b, t, n):
    """
    The probability of going from 'a' lineages to 'b' lineages in time 't'
    with population size 'n'
    """

    try:
        terms = []
        C = stats.prod((b+y)*(a-y)/float(a+y) for y in range(b)) \
            / float(stats.factorial(b))
        terms.append(exp(-b * (b - 1) * t / 2.0 / n) * C)
        for k in range(b + 1, a + 1):
            k1 = k - 1
            C = (b + k1) * (a - k1) / float(a + k1) / float(b - k) * C
            terms.append(
                exp(-k * k1 * t / 2.0 / n) * (2 * k - 1) / float(k1 + b) * C)

        terms.sort(key=abs)
        return kahan_sum(terms)
    except:
        print(a, b, t, n)
        raise
Esempio n. 11
0
def dup_loss_topology_prior(tree,
                            stree,
                            recon,
                            birth,
                            death,
                            maxdoom=20,
                            events=None):
    """
    Returns the log prior of a gene tree topology according to dup-loss model
    """
    def gene2species(gene):
        return recon[tree.nodes[gene]].name

    if events is None:
        events = phylo.label_events(tree, recon)
    leaves = set(tree.leaves())
    phylo.add_implied_spec_nodes(tree, stree, recon, events)

    pstree, snodes, snodelookup = spidir.make_ptree(stree)

    # get doomtable
    doomtable = calc_doom_table(stree, birth, death, maxdoom)

    prod = 0.0
    for node in tree:
        if events[node] == "spec":
            for schild in recon[node].children:
                nodes2 = [x for x in node.children if recon[x] == schild]
                if len(nodes2) > 0:
                    node2 = nodes2[0]
                    subleaves = get_sub_tree(node2, schild, recon, events)
                    nhist = birthdeath.num_topology_histories(node2, subleaves)
                    s = len(subleaves)
                    thist = stats.factorial(s) * stats.factorial(s - 1) / 2**(
                        s - 1)

                    if len(set(subleaves) & leaves) == 0:
                        # internal
                        prod += log(
                            num_redundant_topology(node2, gene2species,
                                                   subleaves, True))
                    else:
                        # leaves
                        prod += log(
                            num_redundant_topology(node2, gene2species,
                                                   subleaves, False))

                else:
                    nhist = 1.0
                    thist = 1.0
                    s = 0

                t = sum(
                    stats.choose(s + i, i) * birthdeath.prob_birth_death1(
                        s + i, schild.dist, birth, death) *
                    exp(doomtable[snodelookup[schild]])**i
                    for i in range(maxdoom + 1))

                prod += log(nhist) - log(thist) + log(t)

    # correct for renumbering
    nt = num_redundant_topology(tree.root, gene2species)
    prod -= log(nt)

    #phylo.removeImpliedSpecNodes(tree, recon, events)
    treelib.remove_single_children(tree)

    return prod