Exemple #1
0
def mutations():
    n = 10
    # num_reps = 1000
    num_reps = 1
    num_loci = 10001
    # recomb_rates = [(1000, 0.005), (2000, 0.01), (3000, 0), (10001, 0.05)]
    recomb_rates = [(10001, 0.05)]
    last_pos = 0
    mean_rate = 0
    for pos, rate in recomb_rates:
        d = (pos - last_pos - 1) / (num_loci - 1)
        mean_rate += d * rate
        # print("mean_rate + ", d, rate)
        # print("rate = ", rate, rate / (4 * 10**4))
        last_pos = pos
    assert last_pos == num_loci
    print("mean_rate = ", mean_rate)
    num_trees = 0
    for j in range(num_reps):
        simulator = msprime.TreeSimulator(n)
        simulator.set_num_loci(num_loci)
        simulator.set_scaled_recombination_rate(mean_rate)
        # simulator.set_random_seed(j)
        simulator.run()
        num_trees += simulator.get_num_breakpoints()
        ts = simulator.get_tree_sequence()
        for t in ts.trees():
            print(t.get_interval()[0])

    # Construct the scrm command line. Use the first value as the background
    # rate
    simulator.set_scaled_recombination_rate(recomb_rates[0][-1])

    cmd = simulator.get_ms_command_line(
        "/home/jk/work/wt/papers/msprime/simulators/scrm",
        num_replicates=num_reps)
    for j in range(len(recomb_rates) - 1):
        pos = recomb_rates[j][0]
        # We still scale the recombination rate by the full locus length,
        # not the subset that we are working over.
        length = num_loci - 1
        rate = recomb_rates[j + 1][1]
        cmd += ["-sr", str(pos), str(rate * length)]
    # print(cmd)
    print(" ".join(cmd))
    result = subprocess.check_output(cmd)
    scrm_num_trees = 0
    for line in result.splitlines():
        # print(line)
        if line.startswith(b"["):
            scrm_num_trees += 1
    print(num_trees / num_reps, scrm_num_trees / num_reps)
Exemple #2
0
def simulations():
    n = 10
    m = 1000
    recomb_map = msprime.RecombinationMap(m, [0, 0.5, 0.6, 0.7, 1],
                                          [0.1, 10, 0, 0.1, 0])
    sim = msprime.TreeSimulator(n)
    sim.set_random_seed(1)
    sim.set_num_loci(m)
    sim.set_recombination_map(recomb_map)
    # sim.set_scaled_recombination_rate(
    #     recomb_map.get_total_recombination_rate())
    sim.run()
    ts = sim.get_tree_sequence()
    size = 0
    for l, records_in, records_out in ts.diffs():
        # print(l, records_in, records_out)
        size += l
    print("size", size, ts.get_sequence_length())
    for t in ts.trees():
        l, r = t.get_interval()
        # print(l, r)
    for l, ns in ts.newick_trees():
        print(l, ns)
Exemple #3
0
def run_verify(args):
    """
    Checks that the distibution of events we get is the same as msprime.
    """
    n = args.sample_size
    m = args.num_loci
    rho = args.recombination_rate
    msp_events = np.zeros(args.num_replicates)
    local_events = np.zeros(args.num_replicates)
    for j in range(args.num_replicates):
        random.seed(j)
        s = Simulator(n, m, rho, 10000)
        s.simulate()
        local_events[j] = s.num_re_events
        s = msprime.TreeSimulator(n)
        s.set_num_loci(m)
        s.set_scaled_recombination_rate(rho)
        s.set_random_seed(j)
        s.run()
        msp_events[j] = s.get_num_recombination_events()
    sm.graphics.qqplot(local_events)
    sm.qqplot_2samples(local_events, msp_events, line="45")
    pyplot.savefig(args.outfile, dpi=72)