Exemple #1
0
def sim_step_indiv(N, time, dr=.0012, lr=.0011, Fd=1e-5, Fl=1e-5, pi=1e0):
    """
    Run a Poisson-based D/L simulation on an individual frequency.
    All inputs are assumed sanitized
    """
    def chooseevent(subrate, fullrate):
        return random.random() <= subrate / fullrate

    drpyr = dr / 1e6 # dr in events/myr/indiv; drpyr in events/yr/indiv
    lrpyr = lr / 1e6 # lr in events/myr/indiv; lrpyr in events/yr/indiv
    edrpyr = drpyr * pi * N # get population dup rate per year   TODO: may need *N?
    elrpyr = lrpyr * pi * N # get population loss rate per year  TODO: see note above
    eventrate = edrpyr + elrpyr # get total event rate per year

    numlosses = 0

    clock = time
    newp = [pi]

    # simulate Poisson process for D/L events
    while clock > 0.0:
        eventtime = stats.exponentialvariate(eventrate) # time to next event
        if eventtime > clock:
            clock = 0.0
            break
        clock -= eventtime
        if chooseevent(edrpyr, eventrate): # determine whether event is D or L
            newp.append(sim_step_indiv(N, clock, dr, lr, Fd, Fl, Fd))
            debugprint("  Duplication")
        else:
            numlosses += 1 # all losses in period grouped together (approx)

    # poor approx?
    if numlosses > 0:
        newp[0] = max(newp[0] - numlosses * Fl, 0.0)
        debugprint("  Losses: " + str(numlosses))
    newp[0] = (coal.sample_freq_CDF(newp[0], N, time))

    return flatten(newp) # needs flattening?
Exemple #2
0
 def sim_walk(node, p, walk_time=0.0, time_until_force=forcetime):
     debugprint(" Sim on branch" + str(node.name) + " with frequency " + str(p) + " and walk time " + str(walk_time))
     if p <= 0.0:
         debugprint("  Extinction on branch " + str(node.name))
         parent = node.parent
         stree.remove_tree(node) # extinction event
         remove_duds(parent)
         return
     elif p >= 1.0: # sanity check
         p = 1.0
     eff_dr = dr * p # * popsize #??
     eff_lr = lr * p # * popsize #??
     eff_bothr = eff_dr + eff_lr
     event_time = stats.exponentialvariate(eff_bothr)
     if event_time >= min(time_until_force, node.dist - walk_time): # >= ok?
         # do not process D/L event; determine whether at force or new node
         if time_until_force < node.dist - walk_time:
             # force new frequency
             newp = coal.sample_freq_CDF(p, popsize, forcetime * 1e6)
               # scale forcetime to years (in myr)
             debugprint("   Forced new frequency: " + str(newp))
             ## TODO: may wish to log newp in node.data
             new_walk_time = walk_time + time_until_force
             return sim_walk(node, newp, walk_time=new_walk_time)
               # continue walk with new frequency
               # increase walk_time accordingly
               # reset time_until_force to forcetime
         else:
             # finish node, determine whether to contine walking on children
             newp = coal.sample_freq_CDF(p, popsize, \
               (node.dist - walk_time) * 1e6)
               # scale remaining time into years (from myr)
             node.data['freq'] = newp
               # stores frequency of allele at the speciation event
             debugprint("   Completed branch; new frequency: " + str(newp))
             return node.recurse(sim_walk, newp)
     else:
         # process D/L event
         # no WF updates for these events (modelling decision)
         new_walk_time = walk_time + event_time
         new_time_until_force = time_until_force - event_time
         if event_is_dup(eff_dr, eff_bothr):
             # perform duplication event
             new_node = treelib.TreeNode(stree.new_name())
               # create a node new_node for the duplication event
             stree.add_child(node.parent, new_node) # add the dup node
             subtree_copy = treelib.subtree(stree, node)
               # make a copy of node's subtree (dup tree)
             stree.remove(node) # pull node off of parent node
             stree.add_child(new_node, node) # attach node to dup node
             stree.add_tree(new_node, subtree_copy) # attach dup copy
             new_node.dist = new_walk_time # set dist to dup
             node.dist = node.dist - new_walk_time # correct for dup dist
             subtree_copy.root.dist = node.dist # also correct for dup dist
             new_node.data['freq'] = p # set frequency at dup event
             debugprint("   Duplication occurred at walk time " + str(new_walk_time))
             sim_walk(node, p, time_until_force = new_time_until_force)
               # recurse on remainder of original branch
             sim_walk(subtree_copy.root, freqdup, time_until_force = new_time_until_force)
               # recurse on dup tree with correct starting frequency
             return
         else:
             # perform loss event
             newp = p - freqloss
             debugprint("   Loss occurred at walk time " + str(new_walk_time) + " yielding new frequency " + str(newp))
             return sim_walk(node, newp, walk_time=new_walk_time, \
               time_until_force=new_time_until_force)
Exemple #3
0
    def sim_walk(gtree, snode, gnode, p, s_walk_time=0.0, g_walk_time=0.0, \
                    time_until_force=forcetime, eventlog=[]):
###    Most of the variables are obvious from descriptions in sim_tree or similar.
###    eventlog is a log of events along the gtree branch; each entry has the form
###     (time_on_branch, event_type, frequency, species_node),
###     where 0.0 <= time_on_branch <= branch_node.dist
###     event_type is one of {'extinction', 'frequency', 'speciation', 
###       duplication', 'loss', 'root', 'gene'}, where 'root' is a unique event
###       not added during the sim_walk process
###     frequency is the branch frequency at the event time
###     species_node is the name of the node of the species tree branch 
###       in which the event occurs
        if p <= 0.0:
            ## EXTINCTION EVENT
            # gnode is 'parent' of extinct node
            #  create new_gnode
            new_gnode = treelib.TreeNode(gtree.new_name())
            new_gnode.dist = g_walk_time
            # set new_gnode's frequency
            new_gnode.data['freq'] = 0.0
            gtree.add_child(gnode, new_gnode)
            # add extinction event to the event log
            ext_event = (g_walk_time, 'extinction', 0.0, snode.name)
            eventlog.append(ext_event)
            # set new_gnode's event log
            new_gnode.data['log'] = eventlog
            eventlog = [] # should have no effect; added for debugging on 18 Oct 2010
        else: # put everything else in this block to avoid using returns
            p = min(p, 1.0) # sanity check
            eff_dr = dr * p # * popsize #??
            eff_lr = lr * p # * popsize #??
            eff_bothr = eff_dr + eff_lr
            event_time = stats.exponentialvariate(eff_bothr)
            remaining_s_dist = snode.dist - s_walk_time
            if event_time >= min(time_until_force, remaining_s_dist):
                # do not process D/L event; determine whether at force or speciation
                if time_until_force < remaining_s_dist:
                    ## FREQUENCY UPDATE EVENT
                    # sample a new frequency (note scaling to years from myr) # edit: not any more
                    newp = coal.sample_freq_CDF(p, popsize, forcetime) # * 1e6)
                      # TODO: if we decide not to reset time_until_force at 
                      #  speciation events, the newp generation will need to be
                      #  altered in some form (probably using a new variable)
                    # update walk times
                    new_s_walk_time = s_walk_time + time_until_force
                    new_g_walk_time = g_walk_time + time_until_force
                    # add frequency event to event log
                    freq_event = (new_g_walk_time, 'frequency', newp, snode.name)
                    eventlog.append(freq_event)
                    # continue the walk with a reset forcetime
                    sim_walk(gtree, snode, gnode, newp, \
                                s_walk_time=new_s_walk_time, \
                                g_walk_time=new_g_walk_time, \
                                eventlog=eventlog)
                    eventlog = [] # should have no effect; debug add on 18 Oct 2010
                else:
                    ## SPECIATION EVENT
                    # separate into separate root, non-root speciations
#                    if gnode.parent: # gnode not the root
                    if gnode.data['log'][-1][1] != 'root':
                        # sample a new frequency (note scaling to years from myr) # edit: not any more
                        newp = coal.sample_freq_CDF(p, popsize, remaining_s_dist) # * 1e6)
                        # create new_gnode for this event
                        new_gnode = treelib.TreeNode(gtree.new_name())
                        new_g_walk_time = g_walk_time + remaining_s_dist
                        new_gnode.dist = new_g_walk_time
                        # set new node's frequency
                        new_gnode.data['freq'] = newp
                        gtree.add_child(gnode, new_gnode)
                        # add speciation event to event log and set the new node's log
                        if snode.is_leaf():
                            gene_event = (new_g_walk_time, 'gene', newp, snode.name)
                            eventlog.append(gene_event)
                            new_gnode.data['log'] = eventlog
                            # end of walk on species branch
                            eventlog = [] # should have no effect; debug add on 18 Oct 2010
                        else:
                            spec_event = (new_g_walk_time, 'speciation', newp, snode.name)
                            eventlog.append(spec_event)
                            new_gnode.data['log'] = eventlog
                            for schild in snode.children:
                                sim_walk(gtree, schild, new_gnode, newp, eventlog=[])
                              # TODO: if we decide not to reset time_until_force at
                              #  speciation events, this sim_walk call will need updating
                            eventlog = [] # should have no effect; debug add on 18 Oct 2010
                    else: # gnode is the root
                        spec_event = (0.0, 'speciation', p, snode.name)
                        eventlog = gnode.data['log']
                        eventlog.append(spec_event)
                        gnode.data['log'] = eventlog
#                        ### debug print
#                        print
#                        print 'adding: ', eventlog
#                        ### end debug
                        for schild in snode.children:
                            sim_walk(gtree, schild, gnode, p, eventlog=[])
                        eventlog = [] # should have no effect; debug add on 18 Oct 2010
            else:
                # process D/L event
                # no WF updates for these events (modelling decision)
                new_s_walk_time = s_walk_time + event_time
                new_g_walk_time = g_walk_time + event_time
                new_time_until_force = time_until_force - event_time
                if event_is_dup(eff_dr, eff_bothr):
                    ## DUPLICATION EVENT
                    # create a node new_gnode for the duplication event
                    new_gnode = treelib.TreeNode(gtree.new_name())
                    new_gnode.dist = new_g_walk_time
                    # set new node's frequency
                    new_gnode.data['freq'] = p
                    gtree.add_child(gnode, new_gnode)
                    # add duplication event to event log and set the new node's log
                    dup_event = (new_g_walk_time, 'duplication', p, snode.name)
                    eventlog.append(dup_event)
                    new_gnode.data['log'] = eventlog
                    # recurse on remainder of original branch
                    sim_walk(gtree, snode, new_gnode, p, \
                                s_walk_time=new_s_walk_time, \
                                time_until_force = new_time_until_force, \
                                eventlog=[])
                    # recurse on dup tree with correct starting frequency
                    sim_walk(gtree, snode, new_gnode, freqdup, \
                                s_walk_time=new_s_walk_time, \
                                time_until_force = new_time_until_force, \
                                eventlog=[(0.0,'daughter',freqdup,snode.name)]) # added for daughter detection
                    eventlog = [] # should have no effect; debug add on 18 Oct 2010
                else:
                    ## LOSS EVENT
                    newp = max(p - freqloss, 0.0) # sanity check
                    # add loss event to event log
                    loss_event = (new_g_walk_time, 'loss', newp, snode.name)
                    eventlog.append(loss_event)
                    sim_walk(gtree, snode, gnode, newp, \
                                s_walk_time=new_s_walk_time, \
                                g_walk_time=new_g_walk_time, \
                                time_until_force=new_time_until_force, \
                                eventlog=eventlog)
                    eventlog = [] # should have no effect; debug add on 18 Oct 2010
Exemple #4
0
    def sim_walk(gtree, snode, gnode, p, s_walk_time=0.0, g_walk_time=0.0, \
                    time_until_force=forcetime):
#        debugprint(" Sim on branch" + str(node.name) + " with frequency " + str(p) + " and walk time " + str(walk_time))
#        debugprint(" walking on " + str(gnode.name))
        if p <= 0.0:
            # gnode is 'parent' of extinct node
            #  create new_gnode, set data['freq'] = 0.0
            #  prune at the end
            new_gnode = treelib.TreeNode(gtree.new_name())
            new_gnode.dist = g_walk_time
            new_gnode.data['freq'] = 0.0
            gtree.add_child(gnode, new_gnode)
#            debugprint("   extinction on " + str(gnode.name))
        else: # put everything else in this block to avoid using returns
            p = min(p, 1.0) # sanity check
            eff_dr = dr * p # * popsize #??
            eff_lr = lr * p # * popsize #??
            eff_bothr = eff_dr + eff_lr
            event_time = stats.exponentialvariate(eff_bothr)
            remaining_s_dist = snode.dist - s_walk_time
            if event_time >= min(time_until_force, remaining_s_dist):
                # do not process D/L event; determine whether at force or speciation
                if time_until_force < remaining_s_dist:
                    # force new frequency
                    newp = coal.sample_freq_CDF(p, popsize, forcetime * 1e6)
                      # scale forcetime to years (in myr)
    #                debugprint("   Forced new frequency: " + str(newp))
                    ## TODO: may wish to log newp in node.data
                    new_s_walk_time = s_walk_time + time_until_force
                    new_g_walk_time = g_walk_time + time_until_force
                    sim_walk(gtree, snode, gnode, newp, \
                                s_walk_time=new_s_walk_time, \
                                g_walk_time=new_g_walk_time)
                      # continue walk with new frequency
                      # increase walk_times accordingly
                      # reset time_until_force to forcetime
                else:
                    # speciation event
                    newp = coal.sample_freq_CDF(p, popsize, remaining_s_dist * 1e6)
                      # scale remaining time into years (from myr)
                    new_gnode = treelib.TreeNode(gtree.new_name())
                    new_gnode.dist = g_walk_time + remaining_s_dist
                    new_gnode.data['freq'] = newp
                      # stores frequency of allele at the speciation event
                    gtree.add_child(gnode, new_gnode)
    #                debugprint("   Completed branch; new frequency: " + str(newp))
                    for schild in snode.children:
                        sim_walk(gtree, schild, new_gnode, newp)
    #                return # shouldn't be necessary
            else:
                # process D/L event
                # no WF updates for these events (modelling decision)
                new_s_walk_time = s_walk_time + event_time
                new_g_walk_time = g_walk_time + event_time
                new_time_until_force = time_until_force - event_time
                if event_is_dup(eff_dr, eff_bothr):
                    # perform duplication event
                    new_gnode = treelib.TreeNode(gtree.new_name())
                      # create a node new_gnode for the duplication event
                    new_gnode.dist = new_g_walk_time # set dist to dup
                    new_gnode.data['freq'] = p # set frequency at dup event
    #                debugprint("   Duplication occurred at walk time " + str(new_walk_time))
                    gtree.add_child(gnode, new_gnode)
#                    debugprint("  starting on orig of " + str(new_gnode.name))
                    sim_walk(gtree, snode, new_gnode, p, \
                                s_walk_time=new_s_walk_time, \
                                time_until_force = new_time_until_force)
                      # recurse on remainder of original branch
#                    debugprint("  starting on dup of " + str(new_gnode.name))
                    sim_walk(gtree, snode, new_gnode, freqdup, \
                                s_walk_time=new_s_walk_time, \
                                time_until_force = new_time_until_force)
                      # recurse on dup tree with correct starting frequency
    #                return
                else:
                    # perform loss event
                    newp = max(p - freqloss, 0.0) # sanity check
    #                debugprint("   Loss occurred at walk time " + str(new_walk_time) + " yielding new frequency " + str(newp))
                    sim_walk(gtree, snode, gnode, newp, \
                                s_walk_time=new_s_walk_time, \
                                g_walk_time=new_g_walk_time, \
                                time_until_force=new_time_until_force)
Exemple #5
0
    def sim_walk(gtree, snode, gparent, p,
                 s_walk_time=0.0, remaining_steptime=steptime,
                 daughter=False):
        """
        eventlog is a log of events along the gtree branch.
        Each entry has the form
          (time_on_branch, event_type, frequency, species_node),
          
        where
           0.0 <= time_on_branch <= branch_node.dist

        event_type is one of
           {'extinction', 'frequency', 'speciation', duplication',
            'loss', 'root', 'gene'},
            
        where 'root' is a unique event not added during the sim_walk process

        frequency is the branch frequency at the event time

        species_node is the name of the node of the species tree branch in
        which the event occurs
        """

        # create new node
        gnode = treelib.TreeNode(gtree.new_name())
        gtree.add_child(gparent, gnode)
        gnode.data = {"freq": p,
                      "log": []}
        eventlog = gnode.data["log"]
        g_walk_time = 0.0
        if daughter:
            eventlog.append((0.0, 'daughter', freqdup, snode.name))
            
        
        # grow this branch, determine next event
        event = None
        while True:
            if p <= 0.0:
                event = "extinct"
                break
            
            # determine remaing time
            remaining_s_dist = snode.dist - s_walk_time
            remaining_time = min(remaining_steptime, remaining_s_dist)

            # sample next dup/loss event
            eff_duprate = duprate * p / freqdup
            eff_lossrate = lossrate * p / freqloss
            eff_bothrate = eff_duprate + eff_lossrate            
            event_time = stats.exponentialvariate(eff_bothrate)

            # advance times
            time_delta = min(event_time, remaining_time)
            s_walk_time += time_delta
            g_walk_time += time_delta

            # sample new frequency
            p = coal.sample_freq_CDF(p, popsize, time_delta)

            # determine event
            if event_time < remaining_time:
                # dup/loss occurs
                if event_is_dup(eff_duprate, eff_bothrate):
                    # dup, stop growing
                    event = "dup"
                    break
                else:
                    # loss, continue growing
                    event = "loss"
                    
            else:
                if remaining_s_dist < remaining_steptime:
                    # we are at a speciation, stop growing
                    event = "spec"
                    break

            # process step
            if event == "loss":
                # LOSS EVENT
                p = max(p - freqloss, 0.0)
                remaining_steptime -= time_delta
                eventlog.append((g_walk_time, 'loss', p, snode.name))
            else:
                # NEXT TIME STEP
                remaining_steptime = steptime
                eventlog.append((g_walk_time, 'frequency', p, snode.name))
                

        # process event
        if event == "extinct":
            # EXTINCTION EVENT (p <= 0)
            gnode.dist = g_walk_time
            gnode.data['freq'] = 0.0
            eventlog.append((g_walk_time, 'extinction', 0.0, snode.name))

        
        elif event == "spec":
            # SPECIATION EVENT
            gnode.dist = g_walk_time
            gnode.data['freq'] = p
                        
            # add speciation event to event log and
            if snode.is_leaf():
                eventlog.append((g_walk_time, 'gene', p, snode.name))
            else:
                eventlog.append((g_walk_time, 'speciation', p, snode.name))
                for schild in snode.children:
                    sim_walk(gtree, schild, gnode, p)


        elif event == "dup":
            # DUPLICATION EVENT
            gnode.dist = g_walk_time
            gnode.data['freq'] = p
            eventlog.append((g_walk_time, 'duplication', p, snode.name))

            # recurse on mother
            sim_walk(gtree, snode, gnode, p, 
                     s_walk_time=s_walk_time, 
                     remaining_steptime=remaining_steptime)

            # recurse on daughter
            sim_walk(gtree, snode, gnode, freqdup, 
                     s_walk_time=s_walk_time, 
                     remaining_steptime=remaining_steptime,
                     daughter=True)

        else:
            raise Exception("unknown event '%s'" % event)