def compute_cost(self, gtree):
     """Returns the duplication-loss cost"""
     recon = phylo.reconcile(gtree, self.stree, self.gene2species)
     events = phylo.label_events(gtree, recon)
     cost = 0
     if self.dupcost != 0:
         cost += phylo.count_dup(gtree, events) * self.dupcost
     if self.losscost != 0:
         cost += phylo.count_loss(gtree, self.stree, recon) * self.losscost
     return cost
Example #2
0
    def eval_proposal(self, proposal):
        """Compute cost of proposal"""

        if not phyloDLC.assert_daughters(proposal.locus_events, proposal.daughters):
            # ensure locus events (duplications) and daughters match
            ndup, nloss, ncoal = None, None, None
            dupcost, losscost, coalcost = util.INF, util.INF, util.INF
        else:
            # find dup cost
            if self.dupcost == 0:
                ndup = None
                dupcost = 0
            else:
                ndup = phylo.count_dup(proposal.locus_tree, proposal.locus_events)
                dupcost = ndup * self.dupcost

            # find loss cost
            if self.losscost == 0:
                nloss = None
                losscost = 0
            else:
                nloss = phylo.count_loss(proposal.locus_tree, self.stree, proposal.locus_recon)
                losscost = nloss * self.losscost

            # find coal cost (first ensure bounded coalescent is satisfied - should always be true based on how daughters are proposed)
            phyloDLC.assert_bounded_coal(self.coal_tree, proposal.coal_recon, proposal.locus_tree, proposal.daughters)
            if self.coalcost == 0:
                ncoal = None
                coalcost = 0
            else:
                # add implied speciation nodes if desired
                # this must be added AFTER counting dups and losses since it affects loss inference
                if self.implied:
                    added = phylo.add_implied_spec_nodes(proposal.locus_tree, self.stree,
                                                         proposal.locus_recon, proposal.locus_events)

                ncoal = phyloDLC.count_coal(self.coal_tree, proposal.coal_recon, proposal.locus_tree)
                coalcost = ncoal * self.coalcost

                if self.implied:
                    phylo.remove_implied_spec_nodes(proposal.locus_tree, added,
                                                    proposal.locus_recon, proposal.locus_events)

        # total cost
        cost = dupcost + losscost + coalcost

        # logging info
        info = {}
        info["ndup"] = ndup
        info["nloss"] = nloss
        info["ncoal"] = ncoal
        info["cost"] = cost
        proposal.data = info

        return cost
Example #3
0
 def _compute_duplosscost(self, ltree):
     """Returns dup/loss cost from locus tree to species tree"""
     cost = 0
     if self.dupcost > 0 or self.losscost > 0:
         recon = phylo.reconcile(ltree, self.stree, self.gene2species)
         events = phylo.label_events(ltree, recon)
         if self.dupcost != 0:
             cost += phylo.count_dup(ltree, events) * self.dupcost
         if self.losscost != 0:
             cost += phylo.count_loss(ltree, self.stree, recon) * self.losscost
     return cost
Example #4
0
    def _compute_coalcost(self, gtree, ltree):
        """Returns deep coalescent cost from coalescent tree (gene tree) to locus tree

        Note: uses Zhang (RECOMB 2000) result that C = L - 2*D
        """
        cost = 0
        if self.coalcost > 0:
            recon = phylo.reconcile(gtree, ltree)
            events = phylo.label_events(gtree, recon)
            cost = (phylo.count_loss(gtree, ltree, recon) - 2*phylo.count_dup(gtree, events)) * self.coalcost
        return cost
Example #5
0
    def prescreen(self, tree):
        recon = phylo.reconcile(tree, self.stree, self.gene2species)
        events = phylo.label_events(tree, recon)

        if self.dupcost == 0:
            dupcost = 0
        else:
            ndup = phylo.count_dup(tree, events)
            dupcost = ndup * self.dupcost

        if self.losscost == 0:
            losscost = 0
        else:
            nloss = phylo.count_loss(tree, self.stree, recon)
            losscost = nloss * self.losscost

        return dupcost + losscost
Example #6
0
    def prescreen(self, tree):
        recon = phylo.reconcile(tree, self.stree, self.gene2species)
        events = phylo.label_events(tree, recon)

        if self.dupcost == 0:
            dupcost = 0
        else:
            ndup = phylo.count_dup(tree, events)
            dupcost = ndup * self.dupcost

        if self.losscost == 0:
            losscost = 0
        else:
            nloss = phylo.count_loss(tree, self.stree, recon)
            losscost = nloss * self.losscost

        return dupcost + losscost
Example #7
0
    def eval_proposal(self, proposal):
        """Compute cost of proposal"""

        if not phyloDLC.assert_daughters(proposal.locus_events,
                                         proposal.daughters):
            # ensure locus events (duplications) and daughters match
            ndup, nloss, ncoal = None, None, None
            dupcost, losscost, coalcost = util.INF, util.INF, util.INF
        else:
            # find dup cost
            if self.dupcost == 0:
                ndup = None
                dupcost = 0
            else:
                ndup = phylo.count_dup(proposal.locus_tree,
                                       proposal.locus_events)
                dupcost = ndup * self.dupcost

            # find loss cost
            if self.losscost == 0:
                nloss = None
                losscost = 0
            else:
                nloss = phylo.count_loss(proposal.locus_tree, self.stree,
                                         proposal.locus_recon)
                losscost = nloss * self.losscost

            # find coal cost (first ensure bounded coalescent is satisfied - should always be true based on how daughters are proposed)
            phyloDLC.assert_bounded_coal(self.coal_tree, proposal.coal_recon,
                                         proposal.locus_tree,
                                         proposal.daughters)
            if self.coalcost == 0:
                ncoal = None
                coalcost = 0
            else:
                # add implied speciation nodes if desired
                # this must be added AFTER counting dups and losses since it affects loss inference
                if self.implied:
                    added = phylo.add_implied_spec_nodes(
                        proposal.locus_tree, self.stree, proposal.locus_recon,
                        proposal.locus_events)

                ncoal = phyloDLC.count_coal(self.coal_tree,
                                            proposal.coal_recon,
                                            proposal.locus_tree)
                coalcost = ncoal * self.coalcost

                if self.implied:
                    phylo.remove_implied_spec_nodes(proposal.locus_tree, added,
                                                    proposal.locus_recon,
                                                    proposal.locus_events)

        # total cost
        cost = dupcost + losscost + coalcost

        # logging info
        info = {}
        info["ndup"] = ndup
        info["nloss"] = nloss
        info["ncoal"] = ncoal
        info["cost"] = cost
        proposal.data = info

        return cost
Example #8
0
def prob_dlcoal_recon_topology(coal_tree, coal_recon,
                               locus_tree, locus_recon, locus_events,
                               daughters,
                               stree, n, duprate, lossrate,
                               pretime=None, premean=None,
                               maxdoom=20, nsamples=100,
                               add_spec=True):
    """
    Probability of a reconcile gene tree in the DLCoal model.

    coal_tree    -- coalescent tree
    coal_recon   -- reconciliation of coalescent tree to locus tree
    locus_tree   -- locus tree (has dup-loss)
    locus_recon  -- reconciliation of locus tree to species tree
    locus_events -- events dict for locus tree
    stree        -- species tree
    n            -- population sizes in species tree
    duprate      -- duplication rate
    lossrate     -- loss rate

    You must also specify one of the following
    pretime      -- starting time before species tree
    premean      -- mean starting time before species tree

    Note: locus tree must have implied speciation nodes present
    """

    dups = phylo.count_dup(locus_tree, locus_events)

    # ensure implicit speciations are present
    if add_spec:
        phylo.add_implied_spec_nodes(locus_tree, stree,
                                     locus_recon, locus_events)
    
    # init popsizes for locus tree
    stree_popsizes = coal.init_popsizes(stree, n)
    popsizes = {}
    for node in locus_tree:
        popsizes[node.name] = stree_popsizes[locus_recon[node].name]


    # duploss probability

    util.tic("top")
    dl_prob = spidir.calc_birth_death_prior(locus_tree, stree, locus_recon,
                                            duprate, lossrate,
                                            maxdoom=maxdoom)
    util.toc()
    
    # daughters probability
    d_prob = dups * log(.5)


    # integrate over duplication times using sampling
    prob = 0.0
    #util.tic("int")
    for i in xrange(nsamples):
        # sample duplication times

        locus_times = spidir.topology_prior.sample_dup_times(
            locus_tree, stree, locus_recon, duprate, lossrate, pretime,
            premean,
            events=locus_events)
        assert len(locus_times) == len(locus_tree.nodes), (
            len(locus_times), len(locus_tree.nodes))
        treelib.set_dists_from_timestamps(locus_tree, locus_times)

        # coal topology probability
        coal_prob = prob_coal_recon_topology(coal_tree, coal_recon,
                                             locus_tree, popsizes, daughters)
        
        prob += exp(coal_prob)
        print coal_prob
    #util.toc()

    return dl_prob + d_prob + util.safelog(prob / nsamples)
Example #9
0
def prob_dlcoal_recon_topology(coal_tree, coal_recon,
                               locus_tree, locus_recon, locus_events,
                               daughters,
                               stree, n, duprate, lossrate,
                               pretime=None, premean=None,
                               nsamples=100,
                               add_spec=True, info=None):
    """
    Probability of a reconcile gene tree in the DLCoal model.

    coal_tree    -- coalescent tree
    coal_recon   -- reconciliation of coalescent tree to locus tree
    locus_tree   -- locus tree (has dup-loss)
    locus_recon  -- reconciliation of locus tree to species tree
    locus_events -- events dict for locus tree
    stree        -- species tree
    n            -- population sizes in species tree
    duprate      -- duplication rate
    lossrate     -- loss rate

    You must also specify one of the following
    pretime      -- starting time before species tree
    premean      -- mean starting time before species tree

    """

    
    # init popsizes for locus tree
    stree_popsizes = coal.init_popsizes(stree, n)
    popsizes = {}
    for node in locus_tree:
        popsizes[node.name] = stree_popsizes[locus_recon[node].name]
    
    
    # duploss probability
    dl_prob = duploss.prob_dup_loss(
        locus_tree, stree, locus_recon, locus_events,
        duprate, lossrate)
    
    # daughters probability
    dups = phylo.count_dup(locus_tree, locus_events)
    d_prob = dups * log(.5)
    
    # integrate over duplication times using sampling
    stimes = treelib.get_tree_timestamps(stree)
    prob = prob_locus_coal_recon_topology_samples(
        coal_tree, coal_recon,
        locus_tree, locus_recon, locus_events, popsizes,
        stree, stimes,
        daughters, duprate, lossrate, nsamples,
        pretime, premean)

    
    # logging info
    if info is not None:
        info["duploss_prob"] = dl_prob
        info["daughters_prob"] = d_prob
        info["coal_prob"] = prob
        info["prob"] = dl_prob + d_prob + prob - log(nsamples)
    
    return dl_prob + d_prob + prob - log(nsamples)
Example #10
0
def prob_locus_gene_species_alignment_recon(alnfile,
                                            partfile,
                                            stree,
                                            popsizes,
                                            duprate,
                                            lossrate,
                                            subrate,
                                            beta,
                                            pretime,
                                            premean,
                                            coal_tree,
                                            coal_recon,
                                            nsamples_coal,
                                            locus_tree,
                                            locus_recon,
                                            nsamples_locus,
                                            daughters,
                                            rates,
                                            freqs,
                                            alphas,
                                            threads=1,
                                            seed=ALIGNMENT_SEED,
                                            eps=0.1,
                                            info=None):
    """
    (Log) probability of the joint probability of locus_tree, locus_recon, coal_tree,
    coal_recon, daughters and alignment. Mathematically, it computes: 
    
    P(T^G, T^L, R^G, R^L, delta^L, A | S, theta) = P(delta^L | T^L, R^L, S) + P(T^L, R^L | S, theta^S) + 
    int int P(t^L | T^L, R^L, S, theta) * P(T^G, R^G, t^G | t^L, T^L, daughters, R^L, theta) * P(A | T^G, t^G) dt^L dt^G

    alnfile           -- alignment file
    partfile	      -- partition file
    stree	      -- species tree
    popsizes          -- population sizes in species tree
    duprate           -- duplication rate
    lossrate          -- loss rate
    subrate           -- substitution rate
    beta              -- regularization parameter
    pretime           -- starting time before species tree
    premean           -- mean starting time before species tree

    coal_tree         -- coalescent tree
    coal_recon        -- reconciliation of coalescent tree to locus tree
    nsamples_coal     -- number of times to sample coal times t^G
    locus_tree        -- locus tree (has dup-loss)
    locus_recon       -- reconciliation of locus tree to species tree
    nsamples_locus    -- number of times to sample the locus tree times t^L
    daughters         -- daughter nodes
    
    rates, freqs, alphas  -- optimization parameters  

    Note: Adapted from dlcoal.prob_dlcoal_recon_topology(...) [in __init.py]
    """

    # duploss proability: P(T^L, R^L | S, theta)
    locus_events = phylo.label_events(locus_tree, locus_recon)
    dl_prob = duploss.prob_dup_loss(locus_tree, stree, locus_recon,
                                    locus_events, duprate, lossrate)

    # daughters probability: P(daughters | T^L, R^L, S)
    dups = phylo.count_dup(locus_tree, locus_events)
    daughter_prob = dups * log(.5)

    # double integral
    double_integral = prob_gene_species_alignment_recon(alnfile,
                                                        partfile,
                                                        stree,
                                                        popsizes,
                                                        duprate,
                                                        lossrate,
                                                        subrate,
                                                        beta,
                                                        pretime,
                                                        premean,
                                                        coal_tree,
                                                        coal_recon,
                                                        nsamples_coal,
                                                        locus_tree,
                                                        locus_recon,
                                                        nsamples_locus,
                                                        daughters,
                                                        rates,
                                                        freqs,
                                                        alphas,
                                                        threads=1,
                                                        seed=ALIGNMENT_SEED,
                                                        eps=0.1,
                                                        info=None)

    return dl_prob + daughter_prob + double_integral