def sim_step_indiv(N, time, dr=.0012, lr=.0011, Fd=1e-5, Fl=1e-5, pi=1e0): """ Run a Poisson-based D/L simulation on an individual frequency. All inputs are assumed sanitized """ def chooseevent(subrate, fullrate): return random.random() <= subrate / fullrate drpyr = dr / 1e6 # dr in events/myr/indiv; drpyr in events/yr/indiv lrpyr = lr / 1e6 # lr in events/myr/indiv; lrpyr in events/yr/indiv edrpyr = drpyr * pi * N # get population dup rate per year TODO: may need *N? elrpyr = lrpyr * pi * N # get population loss rate per year TODO: see note above eventrate = edrpyr + elrpyr # get total event rate per year numlosses = 0 clock = time newp = [pi] # simulate Poisson process for D/L events while clock > 0.0: eventtime = stats.exponentialvariate(eventrate) # time to next event if eventtime > clock: clock = 0.0 break clock -= eventtime if chooseevent(edrpyr, eventrate): # determine whether event is D or L newp.append(sim_step_indiv(N, clock, dr, lr, Fd, Fl, Fd)) debugprint(" Duplication") else: numlosses += 1 # all losses in period grouped together (approx) # poor approx? if numlosses > 0: newp[0] = max(newp[0] - numlosses * Fl, 0.0) debugprint(" Losses: " + str(numlosses)) newp[0] = (coal.sample_freq_CDF(newp[0], N, time)) return flatten(newp) # needs flattening?
def sim_walk(node, p, walk_time=0.0, time_until_force=forcetime): debugprint(" Sim on branch" + str(node.name) + " with frequency " + str(p) + " and walk time " + str(walk_time)) if p <= 0.0: debugprint(" Extinction on branch " + str(node.name)) parent = node.parent stree.remove_tree(node) # extinction event remove_duds(parent) return elif p >= 1.0: # sanity check p = 1.0 eff_dr = dr * p # * popsize #?? eff_lr = lr * p # * popsize #?? eff_bothr = eff_dr + eff_lr event_time = stats.exponentialvariate(eff_bothr) if event_time >= min(time_until_force, node.dist - walk_time): # >= ok? # do not process D/L event; determine whether at force or new node if time_until_force < node.dist - walk_time: # force new frequency newp = coal.sample_freq_CDF(p, popsize, forcetime * 1e6) # scale forcetime to years (in myr) debugprint(" Forced new frequency: " + str(newp)) ## TODO: may wish to log newp in node.data new_walk_time = walk_time + time_until_force return sim_walk(node, newp, walk_time=new_walk_time) # continue walk with new frequency # increase walk_time accordingly # reset time_until_force to forcetime else: # finish node, determine whether to contine walking on children newp = coal.sample_freq_CDF(p, popsize, \ (node.dist - walk_time) * 1e6) # scale remaining time into years (from myr) node.data['freq'] = newp # stores frequency of allele at the speciation event debugprint(" Completed branch; new frequency: " + str(newp)) return node.recurse(sim_walk, newp) else: # process D/L event # no WF updates for these events (modelling decision) new_walk_time = walk_time + event_time new_time_until_force = time_until_force - event_time if event_is_dup(eff_dr, eff_bothr): # perform duplication event new_node = treelib.TreeNode(stree.new_name()) # create a node new_node for the duplication event stree.add_child(node.parent, new_node) # add the dup node subtree_copy = treelib.subtree(stree, node) # make a copy of node's subtree (dup tree) stree.remove(node) # pull node off of parent node stree.add_child(new_node, node) # attach node to dup node stree.add_tree(new_node, subtree_copy) # attach dup copy new_node.dist = new_walk_time # set dist to dup node.dist = node.dist - new_walk_time # correct for dup dist subtree_copy.root.dist = node.dist # also correct for dup dist new_node.data['freq'] = p # set frequency at dup event debugprint(" Duplication occurred at walk time " + str(new_walk_time)) sim_walk(node, p, time_until_force = new_time_until_force) # recurse on remainder of original branch sim_walk(subtree_copy.root, freqdup, time_until_force = new_time_until_force) # recurse on dup tree with correct starting frequency return else: # perform loss event newp = p - freqloss debugprint(" Loss occurred at walk time " + str(new_walk_time) + " yielding new frequency " + str(newp)) return sim_walk(node, newp, walk_time=new_walk_time, \ time_until_force=new_time_until_force)
def sim_walk(gtree, snode, gnode, p, s_walk_time=0.0, g_walk_time=0.0, \ time_until_force=forcetime, eventlog=[]): ### Most of the variables are obvious from descriptions in sim_tree or similar. ### eventlog is a log of events along the gtree branch; each entry has the form ### (time_on_branch, event_type, frequency, species_node), ### where 0.0 <= time_on_branch <= branch_node.dist ### event_type is one of {'extinction', 'frequency', 'speciation', ### duplication', 'loss', 'root', 'gene'}, where 'root' is a unique event ### not added during the sim_walk process ### frequency is the branch frequency at the event time ### species_node is the name of the node of the species tree branch ### in which the event occurs if p <= 0.0: ## EXTINCTION EVENT # gnode is 'parent' of extinct node # create new_gnode new_gnode = treelib.TreeNode(gtree.new_name()) new_gnode.dist = g_walk_time # set new_gnode's frequency new_gnode.data['freq'] = 0.0 gtree.add_child(gnode, new_gnode) # add extinction event to the event log ext_event = (g_walk_time, 'extinction', 0.0, snode.name) eventlog.append(ext_event) # set new_gnode's event log new_gnode.data['log'] = eventlog eventlog = [] # should have no effect; added for debugging on 18 Oct 2010 else: # put everything else in this block to avoid using returns p = min(p, 1.0) # sanity check eff_dr = dr * p # * popsize #?? eff_lr = lr * p # * popsize #?? eff_bothr = eff_dr + eff_lr event_time = stats.exponentialvariate(eff_bothr) remaining_s_dist = snode.dist - s_walk_time if event_time >= min(time_until_force, remaining_s_dist): # do not process D/L event; determine whether at force or speciation if time_until_force < remaining_s_dist: ## FREQUENCY UPDATE EVENT # sample a new frequency (note scaling to years from myr) # edit: not any more newp = coal.sample_freq_CDF(p, popsize, forcetime) # * 1e6) # TODO: if we decide not to reset time_until_force at # speciation events, the newp generation will need to be # altered in some form (probably using a new variable) # update walk times new_s_walk_time = s_walk_time + time_until_force new_g_walk_time = g_walk_time + time_until_force # add frequency event to event log freq_event = (new_g_walk_time, 'frequency', newp, snode.name) eventlog.append(freq_event) # continue the walk with a reset forcetime sim_walk(gtree, snode, gnode, newp, \ s_walk_time=new_s_walk_time, \ g_walk_time=new_g_walk_time, \ eventlog=eventlog) eventlog = [] # should have no effect; debug add on 18 Oct 2010 else: ## SPECIATION EVENT # separate into separate root, non-root speciations # if gnode.parent: # gnode not the root if gnode.data['log'][-1][1] != 'root': # sample a new frequency (note scaling to years from myr) # edit: not any more newp = coal.sample_freq_CDF(p, popsize, remaining_s_dist) # * 1e6) # create new_gnode for this event new_gnode = treelib.TreeNode(gtree.new_name()) new_g_walk_time = g_walk_time + remaining_s_dist new_gnode.dist = new_g_walk_time # set new node's frequency new_gnode.data['freq'] = newp gtree.add_child(gnode, new_gnode) # add speciation event to event log and set the new node's log if snode.is_leaf(): gene_event = (new_g_walk_time, 'gene', newp, snode.name) eventlog.append(gene_event) new_gnode.data['log'] = eventlog # end of walk on species branch eventlog = [] # should have no effect; debug add on 18 Oct 2010 else: spec_event = (new_g_walk_time, 'speciation', newp, snode.name) eventlog.append(spec_event) new_gnode.data['log'] = eventlog for schild in snode.children: sim_walk(gtree, schild, new_gnode, newp, eventlog=[]) # TODO: if we decide not to reset time_until_force at # speciation events, this sim_walk call will need updating eventlog = [] # should have no effect; debug add on 18 Oct 2010 else: # gnode is the root spec_event = (0.0, 'speciation', p, snode.name) eventlog = gnode.data['log'] eventlog.append(spec_event) gnode.data['log'] = eventlog # ### debug print # print # print 'adding: ', eventlog # ### end debug for schild in snode.children: sim_walk(gtree, schild, gnode, p, eventlog=[]) eventlog = [] # should have no effect; debug add on 18 Oct 2010 else: # process D/L event # no WF updates for these events (modelling decision) new_s_walk_time = s_walk_time + event_time new_g_walk_time = g_walk_time + event_time new_time_until_force = time_until_force - event_time if event_is_dup(eff_dr, eff_bothr): ## DUPLICATION EVENT # create a node new_gnode for the duplication event new_gnode = treelib.TreeNode(gtree.new_name()) new_gnode.dist = new_g_walk_time # set new node's frequency new_gnode.data['freq'] = p gtree.add_child(gnode, new_gnode) # add duplication event to event log and set the new node's log dup_event = (new_g_walk_time, 'duplication', p, snode.name) eventlog.append(dup_event) new_gnode.data['log'] = eventlog # recurse on remainder of original branch sim_walk(gtree, snode, new_gnode, p, \ s_walk_time=new_s_walk_time, \ time_until_force = new_time_until_force, \ eventlog=[]) # recurse on dup tree with correct starting frequency sim_walk(gtree, snode, new_gnode, freqdup, \ s_walk_time=new_s_walk_time, \ time_until_force = new_time_until_force, \ eventlog=[(0.0,'daughter',freqdup,snode.name)]) # added for daughter detection eventlog = [] # should have no effect; debug add on 18 Oct 2010 else: ## LOSS EVENT newp = max(p - freqloss, 0.0) # sanity check # add loss event to event log loss_event = (new_g_walk_time, 'loss', newp, snode.name) eventlog.append(loss_event) sim_walk(gtree, snode, gnode, newp, \ s_walk_time=new_s_walk_time, \ g_walk_time=new_g_walk_time, \ time_until_force=new_time_until_force, \ eventlog=eventlog) eventlog = [] # should have no effect; debug add on 18 Oct 2010
def sim_walk(gtree, snode, gnode, p, s_walk_time=0.0, g_walk_time=0.0, \ time_until_force=forcetime): # debugprint(" Sim on branch" + str(node.name) + " with frequency " + str(p) + " and walk time " + str(walk_time)) # debugprint(" walking on " + str(gnode.name)) if p <= 0.0: # gnode is 'parent' of extinct node # create new_gnode, set data['freq'] = 0.0 # prune at the end new_gnode = treelib.TreeNode(gtree.new_name()) new_gnode.dist = g_walk_time new_gnode.data['freq'] = 0.0 gtree.add_child(gnode, new_gnode) # debugprint(" extinction on " + str(gnode.name)) else: # put everything else in this block to avoid using returns p = min(p, 1.0) # sanity check eff_dr = dr * p # * popsize #?? eff_lr = lr * p # * popsize #?? eff_bothr = eff_dr + eff_lr event_time = stats.exponentialvariate(eff_bothr) remaining_s_dist = snode.dist - s_walk_time if event_time >= min(time_until_force, remaining_s_dist): # do not process D/L event; determine whether at force or speciation if time_until_force < remaining_s_dist: # force new frequency newp = coal.sample_freq_CDF(p, popsize, forcetime * 1e6) # scale forcetime to years (in myr) # debugprint(" Forced new frequency: " + str(newp)) ## TODO: may wish to log newp in node.data new_s_walk_time = s_walk_time + time_until_force new_g_walk_time = g_walk_time + time_until_force sim_walk(gtree, snode, gnode, newp, \ s_walk_time=new_s_walk_time, \ g_walk_time=new_g_walk_time) # continue walk with new frequency # increase walk_times accordingly # reset time_until_force to forcetime else: # speciation event newp = coal.sample_freq_CDF(p, popsize, remaining_s_dist * 1e6) # scale remaining time into years (from myr) new_gnode = treelib.TreeNode(gtree.new_name()) new_gnode.dist = g_walk_time + remaining_s_dist new_gnode.data['freq'] = newp # stores frequency of allele at the speciation event gtree.add_child(gnode, new_gnode) # debugprint(" Completed branch; new frequency: " + str(newp)) for schild in snode.children: sim_walk(gtree, schild, new_gnode, newp) # return # shouldn't be necessary else: # process D/L event # no WF updates for these events (modelling decision) new_s_walk_time = s_walk_time + event_time new_g_walk_time = g_walk_time + event_time new_time_until_force = time_until_force - event_time if event_is_dup(eff_dr, eff_bothr): # perform duplication event new_gnode = treelib.TreeNode(gtree.new_name()) # create a node new_gnode for the duplication event new_gnode.dist = new_g_walk_time # set dist to dup new_gnode.data['freq'] = p # set frequency at dup event # debugprint(" Duplication occurred at walk time " + str(new_walk_time)) gtree.add_child(gnode, new_gnode) # debugprint(" starting on orig of " + str(new_gnode.name)) sim_walk(gtree, snode, new_gnode, p, \ s_walk_time=new_s_walk_time, \ time_until_force = new_time_until_force) # recurse on remainder of original branch # debugprint(" starting on dup of " + str(new_gnode.name)) sim_walk(gtree, snode, new_gnode, freqdup, \ s_walk_time=new_s_walk_time, \ time_until_force = new_time_until_force) # recurse on dup tree with correct starting frequency # return else: # perform loss event newp = max(p - freqloss, 0.0) # sanity check # debugprint(" Loss occurred at walk time " + str(new_walk_time) + " yielding new frequency " + str(newp)) sim_walk(gtree, snode, gnode, newp, \ s_walk_time=new_s_walk_time, \ g_walk_time=new_g_walk_time, \ time_until_force=new_time_until_force)
def sim_walk(gtree, snode, gparent, p, s_walk_time=0.0, remaining_steptime=steptime, daughter=False): """ eventlog is a log of events along the gtree branch. Each entry has the form (time_on_branch, event_type, frequency, species_node), where 0.0 <= time_on_branch <= branch_node.dist event_type is one of {'extinction', 'frequency', 'speciation', duplication', 'loss', 'root', 'gene'}, where 'root' is a unique event not added during the sim_walk process frequency is the branch frequency at the event time species_node is the name of the node of the species tree branch in which the event occurs """ # create new node gnode = treelib.TreeNode(gtree.new_name()) gtree.add_child(gparent, gnode) gnode.data = {"freq": p, "log": []} eventlog = gnode.data["log"] g_walk_time = 0.0 if daughter: eventlog.append((0.0, 'daughter', freqdup, snode.name)) # grow this branch, determine next event event = None while True: if p <= 0.0: event = "extinct" break # determine remaing time remaining_s_dist = snode.dist - s_walk_time remaining_time = min(remaining_steptime, remaining_s_dist) # sample next dup/loss event eff_duprate = duprate * p / freqdup eff_lossrate = lossrate * p / freqloss eff_bothrate = eff_duprate + eff_lossrate event_time = stats.exponentialvariate(eff_bothrate) # advance times time_delta = min(event_time, remaining_time) s_walk_time += time_delta g_walk_time += time_delta # sample new frequency p = coal.sample_freq_CDF(p, popsize, time_delta) # determine event if event_time < remaining_time: # dup/loss occurs if event_is_dup(eff_duprate, eff_bothrate): # dup, stop growing event = "dup" break else: # loss, continue growing event = "loss" else: if remaining_s_dist < remaining_steptime: # we are at a speciation, stop growing event = "spec" break # process step if event == "loss": # LOSS EVENT p = max(p - freqloss, 0.0) remaining_steptime -= time_delta eventlog.append((g_walk_time, 'loss', p, snode.name)) else: # NEXT TIME STEP remaining_steptime = steptime eventlog.append((g_walk_time, 'frequency', p, snode.name)) # process event if event == "extinct": # EXTINCTION EVENT (p <= 0) gnode.dist = g_walk_time gnode.data['freq'] = 0.0 eventlog.append((g_walk_time, 'extinction', 0.0, snode.name)) elif event == "spec": # SPECIATION EVENT gnode.dist = g_walk_time gnode.data['freq'] = p # add speciation event to event log and if snode.is_leaf(): eventlog.append((g_walk_time, 'gene', p, snode.name)) else: eventlog.append((g_walk_time, 'speciation', p, snode.name)) for schild in snode.children: sim_walk(gtree, schild, gnode, p) elif event == "dup": # DUPLICATION EVENT gnode.dist = g_walk_time gnode.data['freq'] = p eventlog.append((g_walk_time, 'duplication', p, snode.name)) # recurse on mother sim_walk(gtree, snode, gnode, p, s_walk_time=s_walk_time, remaining_steptime=remaining_steptime) # recurse on daughter sim_walk(gtree, snode, gnode, freqdup, s_walk_time=s_walk_time, remaining_steptime=remaining_steptime, daughter=True) else: raise Exception("unknown event '%s'" % event)