def mutations(): n = 10 # num_reps = 1000 num_reps = 1 num_loci = 10001 # recomb_rates = [(1000, 0.005), (2000, 0.01), (3000, 0), (10001, 0.05)] recomb_rates = [(10001, 0.05)] last_pos = 0 mean_rate = 0 for pos, rate in recomb_rates: d = (pos - last_pos - 1) / (num_loci - 1) mean_rate += d * rate # print("mean_rate + ", d, rate) # print("rate = ", rate, rate / (4 * 10**4)) last_pos = pos assert last_pos == num_loci print("mean_rate = ", mean_rate) num_trees = 0 for j in range(num_reps): simulator = msprime.TreeSimulator(n) simulator.set_num_loci(num_loci) simulator.set_scaled_recombination_rate(mean_rate) # simulator.set_random_seed(j) simulator.run() num_trees += simulator.get_num_breakpoints() ts = simulator.get_tree_sequence() for t in ts.trees(): print(t.get_interval()[0]) # Construct the scrm command line. Use the first value as the background # rate simulator.set_scaled_recombination_rate(recomb_rates[0][-1]) cmd = simulator.get_ms_command_line( "/home/jk/work/wt/papers/msprime/simulators/scrm", num_replicates=num_reps) for j in range(len(recomb_rates) - 1): pos = recomb_rates[j][0] # We still scale the recombination rate by the full locus length, # not the subset that we are working over. length = num_loci - 1 rate = recomb_rates[j + 1][1] cmd += ["-sr", str(pos), str(rate * length)] # print(cmd) print(" ".join(cmd)) result = subprocess.check_output(cmd) scrm_num_trees = 0 for line in result.splitlines(): # print(line) if line.startswith(b"["): scrm_num_trees += 1 print(num_trees / num_reps, scrm_num_trees / num_reps)
def simulations(): n = 10 m = 1000 recomb_map = msprime.RecombinationMap(m, [0, 0.5, 0.6, 0.7, 1], [0.1, 10, 0, 0.1, 0]) sim = msprime.TreeSimulator(n) sim.set_random_seed(1) sim.set_num_loci(m) sim.set_recombination_map(recomb_map) # sim.set_scaled_recombination_rate( # recomb_map.get_total_recombination_rate()) sim.run() ts = sim.get_tree_sequence() size = 0 for l, records_in, records_out in ts.diffs(): # print(l, records_in, records_out) size += l print("size", size, ts.get_sequence_length()) for t in ts.trees(): l, r = t.get_interval() # print(l, r) for l, ns in ts.newick_trees(): print(l, ns)
def run_verify(args): """ Checks that the distibution of events we get is the same as msprime. """ n = args.sample_size m = args.num_loci rho = args.recombination_rate msp_events = np.zeros(args.num_replicates) local_events = np.zeros(args.num_replicates) for j in range(args.num_replicates): random.seed(j) s = Simulator(n, m, rho, 10000) s.simulate() local_events[j] = s.num_re_events s = msprime.TreeSimulator(n) s.set_num_loci(m) s.set_scaled_recombination_rate(rho) s.set_random_seed(j) s.run() msp_events[j] = s.get_num_recombination_events() sm.graphics.qqplot(local_events) sm.qqplot_2samples(local_events, msp_events, line="45") pyplot.savefig(args.outfile, dpi=72)