Beispiel #1
0
def plot_stairwayplot_coalrate(sp_infiles,
                               outfile,
                               model,
                               n_samp,
                               generation_time,
                               species,
                               pop_id=0,
                               steps=None):  # JRA

    ddb = msprime.DemographyDebugger(**model.asdict())
    if steps is None:
        end_time = ddb.epochs[-2].end_time + 10000
        steps = np.linspace(1, end_time, end_time + 1)
    num_samples = [0 for _ in range(ddb.num_populations)]
    num_samples[pop_id] = n_samp
    coal_rate, P = ddb.coalescence_rate_trajectory(
        steps=steps, num_samples=num_samples, double_step_validation=False)
    steps = steps * generation_time
    f, ax = plt.subplots(1, 1, sharex=True, sharey=True, figsize=(7, 7))
    ax.plot(steps, 1 / (2 * coal_rate), c="black")
    for infile in sp_infiles:
        nt = pandas.read_csv(infile, sep="\t", skiprows=5)
        line2, = ax.plot(nt['year'], nt['Ne_median'], alpha=0.8)
    ax.plot(steps, 1 / (2 * coal_rate), c="black")
    ax.set_title("stairwayplot")
    plt.suptitle(f"{species}, population id {pop_id}", fontsize=16)
    ax.set(xscale="log", yscale="log")
    ax.set_xlabel("time (years ago)")
    red_patch = mpatches.Patch(color='black',
                               label='Coalescence rate derived Ne')
    ax.legend(frameon=False, fontsize=10, handles=[red_patch])
    ax.set_ylabel("population size")
    f.savefig(outfile, bbox_inches='tight', alpha=0.8)
Beispiel #2
0
def const0001(N_0,mig_prop,t_1,t_2,seq_length,recomb_rate,mut_rate,print_):
# def instant_struct0001(N_0,mig_prop,t_1,t_2,seq_length,recomb_rate,mut_rate,print_):
    print('N_0 is {}'.format(N_0))
    print('mig_prop is {}, but is irrelevant here'.format(mig_prop))
    print('t_1 is {} and t_2 is {}, but these are irrelevant here'.format(t_1,t_2))
    print('seq_length is {}'.format(seq_length))
    print('recomb_rate is {}'.format(recomb_rate))
    print('mut_rate is {}'.format(mut_rate))
    N_B0 = mig_prop * 1e+04
    # initially.
    population_configurations = [
        msprime.PopulationConfiguration(
            sample_size=2, initial_size=N_0, growth_rate=0),
    ]
    demographic_events = []
    # Use the demography debugger to print out the demographic history
    # that we have just described.
    dd = msprime.DemographyDebugger(
        population_configurations=population_configurations,
        demographic_events=demographic_events)
    if print_:
        print('Demographic history:\n')
        dd.print_history()
    sim = msprime.simulate(population_configurations=population_configurations,
                           demographic_events=demographic_events, length=seq_length, recombination_rate=recomb_rate,
                           mutation_rate=mut_rate)
    return sim
Beispiel #3
0
def zigzag_model(args, print=False):
    """Derived model used by Schiffels and Durbin (2014) and Terhorst and
    Terhorst, Kamm, and Song (2017) with periods of exponential growth and
    decline in a single population. Here, growth rates are changed to pop sizes.
    Schiffels and Durbin, 2014. https://doi.org/10.1038/ng.3015"""

    params, randomize, i, proposals = args
    necessary_params = [
        "mu",
        "r",
        "T1",
        "N1",
        "T2",
        "N2",
        "T3",
        "N3",
        "T4",
        "N4",
        "T5",
        "N5",
    ]
    for p in necessary_params:
        if p not in list(params.keys()):
            print(
                "Invalid combination of parameters. Needed: "
                "mu | r | T1 | N1 | T2 | N2 | T3 | N3 | T4 | N4 | T5 | N5"
            )

    if proposals:
        mu, r, T1, N1, T2, N2, T3, N3, T4, N4, T5, N5 = [
            params[p].prop(i) if params[p].inferable else params[p].val
            for p in necessary_params
        ]
    else:
        mu, r, T1, N1, T2, N2, T3, N3, T4, N4, T5, N5 = [
            params[p].rand() if randomize else params[p].val for p in necessary_params
        ]

    generation_time = 30
    N0 = 71560
    n_ancient = N0 / 10
    t_ancient = 34133.318528

    demographic_events = [
        msprime.PopulationParametersChange(time=0, initial_size=N0, population_id=0),
        msprime.PopulationParametersChange(time=T1, initial_size=N1, population_id=0),
        msprime.PopulationParametersChange(time=T2, initial_size=N2, population_id=0),
        msprime.PopulationParametersChange(time=T3, initial_size=N3, population_id=0),
        msprime.PopulationParametersChange(time=T4, initial_size=N4, population_id=0),
        msprime.PopulationParametersChange(time=T5, initial_size=N5, population_id=0),
        msprime.PopulationParametersChange(
            time=t_ancient, initial_size=n_ancient, population_id=0
        ),
    ]

    if print:
        debugger = msprime.DemographyDebugger(Ne=71560, demographic_events=demographic_events)
        debugger.print_history()

    return demographic_events, mu, r
Beispiel #4
0
def constmig20201216():
    # 20201214 mig_rate = 5e-04; T1, T2 = 20000,40000 ; seq_length = 150e+06
    N_A0 = 1e+04
    N_B0 =  1e+04
    m = 5e-05
    T_1 = 20000
    T_2 = 40000
    seq_length = 150e+06

    
    population_configurations = [
        msprime.PopulationConfiguration(
            sample_size=2, initial_size=N_A0, growth_rate=0),
        msprime.PopulationConfiguration(
            sample_size=0, initial_size=N_B0, growth_rate=0)
    ]
    migration_matrix = [[0,0],[0,0]]
    demographic_events = [
        msprime.MigrationRateChange(time = T_1,rate = m, matrix_index=(0,1)),
        msprime.MigrationRateChange(time = T_1,rate = m, matrix_index=(1,0)),
        msprime.MigrationRateChange(time = T_2,rate = 0, matrix_index=(0,1)),
        msprime.MigrationRateChange(time = T_2,rate = 0, matrix_index=(1,0)),
        msprime.MassMigration(time=T_2, source =0, destination =1, proportion = 1)
    ]
    # Use the demography debugger to print out the demographic history
    # that we have just described.
    dd = msprime.DemographyDebugger(
        population_configurations=population_configurations,migration_matrix=migration_matrix,
        demographic_events=demographic_events)
    print('Demographic history:\n')
    dd.print_history()
    sim = msprime.simulate(population_configurations=population_configurations,
                           demographic_events=demographic_events, length=seq_length, recombination_rate=2e-08,
                           mutation_rate=2e-08)
    return sim
Beispiel #5
0
def constmig_m5e05():
    T_1 = 2e+04
    T_2 = 6e+04
    N_A0 = 1e+04
    N_B0 =  1e+04
    m = 5e-05
    seq_length = 150e+06
    
    population_configurations = [
        msprime.PopulationConfiguration(
            sample_size=2, initial_size=N_A0, growth_rate=0),
        msprime.PopulationConfiguration(
            sample_size=0, initial_size=N_B0, growth_rate=0)
    ]
    migration_matrix = [[0,0],[0,0]]
    demographic_events = [
        msprime.MigrationRateChange(time = T_1,rate = m, matrix_index=(0,1)),
        msprime.MigrationRateChange(time = T_1,rate = m, matrix_index=(1,0)),
        msprime.MigrationRateChange(time = T_2,rate = 0, matrix_index=(0,1)),
        msprime.MigrationRateChange(time = T_2,rate = 0, matrix_index=(1,0)),        
        msprime.MassMigration(time=T_2, source =1, destination =0, proportion = 1)
    ]
    dd = msprime.DemographyDebugger(
        population_configurations=population_configurations,migration_matrix=migration_matrix,
        demographic_events=demographic_events)

    print('Demographic history:\n')
    dd.print_history()
    sim = msprime.simulate(population_configurations=population_configurations,
                           demographic_events=demographic_events, length=seq_length, recombination_rate=2e-08,mutation_rate=2e-08)
    return sim
Beispiel #6
0
def bottleneck_model(args, print=False):

    params, randomize, i, proposals = args
    necessary_params = ["mu", "r", "N0", "T1", "N1", "T2", "N2"]

    for p in necessary_params:
        if p not in list(params.keys()):
            print(
                "Invalid combination of parameters. Needed: "
                "mu | r | N0 | T1 | N1 | T2 | N2"
            )

    if proposals:
        mu, r, N0, T1, N1, T2, N2 = [
            params[p].prop(i) if params[p].inferable else params[p].val
            for p in necessary_params
        ]
    else:
        mu, r, N0, T1, N1, T2, N2 = [
            params[p].rand() if randomize else params[p].val for p in necessary_params
        ]

    # Infer the 3 pop sizes, where N0 = N2
    demographic_events = [
        msprime.PopulationParametersChange(time=0, initial_size=N0),
        msprime.PopulationParametersChange(time=T1, initial_size=N1),
        msprime.PopulationParametersChange(time=T2, initial_size=N2),
    ]

    if print:
        debugger = msprime.DemographyDebugger(Ne=10000, demographic_events=demographic_events)
        debugger.print_history()

    return demographic_events, mu, r
Beispiel #7
0
def pscmatch20201214():
    # 20201214 mig_rate = 5e-04; T1, T2 = 20000,40000 ; seq_length = 150e+06
    N_A0 = 1e+04
    N_B0 =  1e+04
    change = 2
    T_1 = 20000
    T_2 = 40000
    seq_length = 150e+06
    
    N_A = 10000
    population_configurations = [
        msprime.PopulationConfiguration(
            sample_size=2, initial_size=N_A,growth_rate=0)
    ]
    migration_matrix = [
        [0],
        ]
    demographic_events = [
        msprime.PopulationParametersChange(time=T_1, initial_size=N_A0*change),
        msprime.PopulationParametersChange(time=T_2, initial_size=N_A0)
    ]
    # Use the demography debugger to print out the demographic history
    # that we have just described.
    dd = msprime.DemographyDebugger(
        population_configurations=population_configurations,
        migration_matrix=migration_matrix,
        demographic_events=demographic_events)
    dd.print_history()
    sim = msprime.simulate(population_configurations=population_configurations,
                           migration_matrix=migration_matrix,
                           demographic_events=demographic_events, length=seq_length, recombination_rate=2e-8,mutation_rate=2e-8)
    return sim
Beispiel #8
0
 def setUp(self):
     self.model = _5pop_test_demog()
     self.ddb = msprime.DemographyDebugger(
         demographic_events=self.model.demographic_events,
         population_configurations=self.model.population_configurations,
         migration_matrix=self.model.migration_matrix,
     )
Beispiel #9
0
def double_m0002(N_0,mig_prop,T_1,T_2,seq_length,recomb_rate,mut_rate,print_):
    # here mig_prop is the percentage change in N_0 between times T_1 and T_2
    # use this to match contmig5e4
    population_configurations = [
        msprime.PopulationConfiguration(
            sample_size=2, initial_size=N_0,growth_rate=0)
    ]
    migration_matrix = [
        [0],
        ]
    demographic_events = [
        msprime.PopulationParametersChange(time=T_1, initial_size=N_0*mig_prop),
        msprime.PopulationParametersChange(time=T_2, initial_size=N_0)
    ]
    # Use the demography debugger to print out the demographic history
    # that we have just described.
    dd = msprime.DemographyDebugger(
        population_configurations=population_configurations,
        migration_matrix=migration_matrix,
        demographic_events=demographic_events)
    if print_:
        dd.print_history()
    sim = msprime.simulate(population_configurations=population_configurations,
                           migration_matrix=migration_matrix,
                           demographic_events=demographic_events, length=seq_length, recombination_rate=recomb_rate,mutation_rate=mut_rate)
    return sim
Beispiel #10
0
def struct0001(print_):
    N_A0 = 1e+04
    N_B0 = 1e+04
    T_A = 2e+04
    T_B = 3e+04
    # initially.
    population_configurations = [
        msprime.PopulationConfiguration(
            sample_size=2, initial_size=N_A0, growth_rate=0),
        msprime.PopulationConfiguration(
            sample_size=0, initial_size=N_B0, growth_rate=0),
    ]
    migration_matrix = [[0,0],[0,0]]
    demographic_events = [
        msprime.MassMigration(
            time=T_A, source = 0, dest=1,proportion=1),
        msprime.MassMigration(
            time=T_B, source=1, dest=0, proportion=1)
    ]
    # Use the demography debugger to print out the demographic history
    # that we have just described.
    dd = msprime.DemographyDebugger(
        population_configurations=population_configurations,
        migration_matrix=migration_matrix,
        demographic_events=demographic_events)
    if print_:
        dd.print_history()
    sim = msprime.simulate(population_configurations=population_configurations,
                           migration_matrix=migration_matrix,
                           demographic_events=demographic_events, length=3e+6, recombination_rate=2e-8,
                           mutation_rate=2e-8,random_seed=50)
    return sim
Beispiel #11
0
def m0002(print_):
    N_A = 10000
    T_A1 = 20000 
    population_configurations = [
        msprime.PopulationConfiguration(
            sample_size=2, initial_size=N_A,growth_rate=0)
    ]
    migration_matrix = [
        [0],
        ]
    demographic_events = [
        msprime.PopulationParametersChange(time=T_A1, initial_size=N_A*2)
    ]
    # Use the demography debugger to print out the demographic history
    # that we have just described.
    dd = msprime.DemographyDebugger(
        population_configurations=population_configurations,
        migration_matrix=migration_matrix,
        demographic_events=demographic_events)
    if print_:
        dd.print_history()
    sim = msprime.simulate(population_configurations=population_configurations,
                           migration_matrix=migration_matrix,
                           demographic_events=demographic_events, length=150e+6, recombination_rate=2e-8,mutation_rate=2e-8)
    return sim
Beispiel #12
0
def const_mig0002(print_):
    N_A0 = 1e+04
    N_B0 =  1e+04
    T_1 = 2e+04
    T_2 = 2.5e+04
    m = 4e-03
    
    population_configurations = [
        msprime.PopulationConfiguration(
            sample_size=2, initial_size=N_A0, growth_rate=0),
        msprime.PopulationConfiguration(
            sample_size=0, initial_size=N_B0, growth_rate=0)
    ]
    migration_matrix = [[0,0],[0,0]]
    demographic_events = [
        msprime.MigrationRateChange(time = T_1,rate = m, matrix_index=(0,1)),
        msprime.MigrationRateChange(time = T_1,rate = m, matrix_index=(1,0)),
        msprime.MigrationRateChange(time = T_2,rate = 0, matrix_index=(0,1)),
        msprime.MigrationRateChange(time = T_2,rate = 0, matrix_index=(1,0)),
        msprime.MassMigration(time=T_2, source =0, destination =1, proportion = 1)
    ]
    # Use the demography debugger to print out the demographic history
    # that we have just described.
    dd = msprime.DemographyDebugger(
        population_configurations=population_configurations,migration_matrix=migration_matrix,
        demographic_events=demographic_events)
    if print_:
        print('Demographic history:\n')
        dd.print_history()
    sim = msprime.simulate(population_configurations=population_configurations,
                           demographic_events=demographic_events, length=150e+06, recombination_rate=2e-08,
                           mutation_rate=2e-08)
    return sim
Beispiel #13
0
    def simulate_seqs(self):
        LOGGER.debug("Entering simulate_seqs - {}".format(self.name))
        ## TODO: Here we are assuming only one island and that the migration
        ## is only ever unidirectional from the mainland to the island
        migmat = [[0, self.stats["migration_rate"]], [0, 0]]

        pop_local = self._get_local_configuration()
        pop_meta = self._get_meta_configuration()

        split_events = self._get_local_meta_split()

        ## Useful for debugging demographic events.
        if LOGGER.getEffectiveLevel() == 10:
            debug = msprime.DemographyDebugger(population_configurations = [pop_local, pop_meta],\
                                               migration_matrix = migmat,\
                                               demographic_events = split_events)

            ## Enable this at your own peril, it will dump a ton of shit to stdout
            debug.print_history()
            ## I briefly toyed with the idea of logging this to a file, but you really
            ## don't need it that often, and it'd be a pain to get the outdir in here.
            #debugfile = os.path.join(self._hackersonly["outdir"],
            #                    self.paramsdict["name"] + "-simout.txt")
            #with open(debugfile, 'a') as outfile:
            #    outfile.write(debug.print_history())

        LOGGER.debug("Executing msprime - {}".format(self.name))
        self.tree_sequence = msprime.simulate(length = self.paramsdict["sequence_length"],\
                                              mutation_rate = self.paramsdict["mutation_rate"],\
                                              population_configurations = [pop_local, pop_meta],\
                                              migration_matrix = migmat,\
                                              demographic_events = split_events)
        self.get_sumstats()
Beispiel #14
0
 def _demography_debug(self):
     """Demography debugging."""
     dd = msp.DemographyDebugger(
         population_configurations=self.pop_config,
         demographic_events=self.demography,
     )
     dd.print_history()
Beispiel #15
0
def matching_CR(N_0,
                seq_length,
                mut_rate,
                recomb_rate,
                demographic_events_input,
                print_=True):
    # msprime model to implement population size changes
    # demographic_events_input is the array of population sizes that you are implementing
    # check these with the model you are matching
    N_0 = 10000
    population_configurations = [
        msprime.PopulationConfiguration(sample_size=2,
                                        initial_size=N_0,
                                        growth_rate=0)
    ]
    migration_matrix = [[0]]
    demographic_events = demographic_events_input
    dd = msprime.DemographyDebugger(
        population_configurations=population_configurations,
        migration_matrix=migration_matrix,
        demographic_events=demographic_events)
    if print_:
        dd.print_history()
    sim = msprime.simulate(population_configurations=population_configurations,
                           migration_matrix=migration_matrix,
                           demographic_events=demographic_events,
                           length=seq_length,
                           recombination_rate=recomb_rate,
                           mutation_rate=mut_rate)
    return sim
def austin(preNE=10000,NE=100,postNE=5000,Nsamples=20,Tend=10,Tstart=25,debug=False):
    # population sizes
    N_Xpostb = postNE
    N_Xb = NE
    N_Xpreb = preNE
    # event times in generations
    T_Xb = Tend
    T_Xpreb = Tstart
    # the growth rate
    r_X = 0
    # initialize populations
    population_configurations = [
        msprime.PopulationConfiguration(
            sample_size=Nsamples, initial_size=N_Xpostb, growth_rate=r_X)
    ]
    migration_matrix = [
        [0]
    ]
    demographic_events = [
        #start bottleneck
        msprime.PopulationParametersChange(
            time=T_Xb, initial_size=N_Xb, population_id=0),
        #end bottleneck
        msprime.PopulationParametersChange(
            time=T_Xpreb, initial_size=N_Xpreb, population_id=0)
    ]
    if debug:
        dd = msprime.DemographyDebugger(
            population_configurations=population_configurations,
            migration_matrix=migration_matrix,
            demographic_events=demographic_events)
        dd.print_history()
        return
    return population_configurations, migration_matrix, demographic_events
Beispiel #17
0
 def print_debug(self, output):
     print(np.array(self.get_migration_matrix()), file=output)
     msprime.DemographyDebugger(
         Ne=self.N_A,
         population_configurations=self.get_debug_configuration(),
         migration_matrix=self.get_migration_matrix(),
         demographic_events=self.get_demographic_events()
     ).print_history(output)
Beispiel #18
0
 def debug(self, out_file=sys.stdout):
     # Use the demography debugger to print out the demographic history
     # that we have just described.
     dd = msprime.DemographyDebugger(
         population_configurations=self.population_configurations,
         migration_matrix=self.migration_matrix,
         demographic_events=self.demographic_events)
     dd.print_history(out_file)
Beispiel #19
0
def plot_all_ne_estimates(sp_infiles,
                          smcpp_infiles,
                          msmc_infiles,
                          outfile,
                          model,
                          n_samp,
                          generation_time,
                          species,
                          pop_id=0,
                          steps=None):

    ddb = msprime.DemographyDebugger(**model.asdict())
    if steps is None:
        end_time = ddb.epochs[-2].end_time + 10000
        steps = np.linspace(1, end_time, end_time + 1)
    num_samples = [0 for _ in range(ddb.num_populations)]
    num_samples[pop_id] = n_samp
    coal_rate, P = ddb.coalescence_rate_trajectory(
        steps=steps, num_samples=num_samples, double_step_validation=False)
    steps = steps * generation_time
    num_msmc = set(
        [os.path.basename(infile).split(".")[0] for infile in msmc_infiles])
    num_msmc = sorted([int(x) for x in num_msmc])
    f, ax = plt.subplots(1,
                         2 + len(num_msmc),
                         sharex=True,
                         sharey=True,
                         figsize=(14, 7))
    for infile in smcpp_infiles:
        nt = pandas.read_csv(infile, usecols=[1, 2], skiprows=0)
        line1, = ax[0].plot(nt['x'], nt['y'], alpha=0.8)
    ax[0].plot(steps, 1 / (2 * coal_rate), c="black")
    ax[0].set_title("smc++")
    for infile in sp_infiles:
        nt = pandas.read_csv(infile, sep="\t", skiprows=5)
        line2, = ax[1].plot(nt['year'], nt['Ne_median'], alpha=0.8)
    ax[1].plot(steps, 1 / (2 * coal_rate), c="black")
    ax[1].set_title("stairwayplot")
    for i, sample_size in enumerate(num_msmc):
        for infile in msmc_infiles:
            fn = os.path.basename(infile)
            samp = fn.split(".")[0]
            if (int(samp) == sample_size):
                nt = pandas.read_csv(infile, usecols=[1, 2], skiprows=0)
                line3, = ax[2 + i].plot(nt['x'], nt['y'], alpha=0.8)
        ax[2 + i].plot(steps, 1 / (2 * coal_rate), c="black")
        ax[2 + i].set_title(f"msmc, ({sample_size} samples)")
    plt.suptitle(f"{species}, population id {pop_id}", fontsize=16)
    for i in range(2 + len(num_msmc)):
        ax[i].set(xscale="log")
        ax[i].set_xlabel("time (years ago)")
    red_patch = mpatches.Patch(color='black',
                               label='Coalescence rate derived Ne')
    ax[0].legend(frameon=False, fontsize=10, handles=[red_patch])
    ax[0].set_ylabel("population size")
    # maxy, miny = ax[0].get_ylim()
    # ax[0].set_yticks(np.arange(maxy, miny, 10))
    f.savefig(outfile, bbox_inches='tight', alpha=0.8)
Beispiel #20
0
 def _demography_debug(self):
     """Demography debugging."""
     dd = msp.DemographyDebugger(
         population_configurations=self.pop_config,
         migration_matrix=self.migration_matrix,
         demographic_events=self.demography,
     )
     # print out the debugging history
     dd.print_history()
Beispiel #21
0
def two_bin(NA, N1, N2, Ts, M1, M2):
    NA = NA
    N1 = N1
    N2 = N2
    Ts = Ts
    M1 = M1
    M2 = M2

    population_configurations = [
        msprime.PopulationConfiguration(sample_size=0, initial_size=N1),
        msprime.PopulationConfiguration(sample_size=50, initial_size=N2)
    ]
    migration_matrix = [[0, M2], [0, 0]]
    demographic_events = [
        msprime.MigrationRateChange(time=Ts / 2, rate=M1, matrix_index=(0, 1)),
        #msprime.MigrationRateChange(time=Ts/2, rate=M1, matrix_index=(1, 0)),
        msprime.MassMigration(time=Ts, source=1, destination=0, proportion=1.0)
    ]

    dp = msprime.DemographyDebugger(
        Ne=NA,
        population_configurations=population_configurations,
        migration_matrix=migration_matrix,
        demographic_events=demographic_events)
    #dp.print_history()
    replicates = 10
    length = 100000
    sim = msprime.simulate(Ne=NA,
                           population_configurations=population_configurations,
                           migration_matrix=migration_matrix,
                           demographic_events=demographic_events,
                           mutation_rate=1e-7,
                           recombination_rate=1e-8,
                           length=length,
                           num_replicates=replicates)
    #pi = np.zeros(replicates)
    #seg = np.zeros(replicates)
    #ld = np.zeros(replicates)
    #for j,s in enumerate(sim):
    #    pi[j]=s.get_pairwise_diversity()
    #    seg[j] = s.get_num_mutations()
    #    ld[j] = np.var(msprime.LdCalculator(s).get_r2_matrix())

    #return(np.array([np.mean(pi),np.var(pi),np.mean(seg),np.var(seg)]))
    #return(np.array([np.var(pi),np.var(seg),np.var(ld)]))

    # for MS like output (for msstats)
    for j, s in enumerate(sim):
        print("//")
        print("segsites: " + str(s.get_num_mutations()))
        pos = [((mut.position) / length) for mut in s.mutations()]
        print("positions: " + " ".join(str(e) for e in pos))
        for h in s.haplotypes():
            print(h)
Beispiel #22
0
 def test_demography_debugger_equal(self):
     for model in stdpopsim.all_demographic_models():
         ddb1 = model.get_demography_debugger()
         ddb2 = msprime.DemographyDebugger(
             population_configurations=model.population_configurations,
             migration_matrix=model.migration_matrix,
             demographic_events=model.demographic_events)
         f1 = io.StringIO()
         f2 = io.StringIO()
         ddb1.print_history(f1)
         ddb2.print_history(f2)
         self.assertEqual(f1.getvalue(), f2.getvalue())
Beispiel #23
0
 def verify_debug(self, population_configurations, migration_matrix,
                  demographic_events):
     with tempfile.TemporaryFile("w+") as f:
         dp = msprime.DemographyDebugger(
             population_configurations=population_configurations,
             migration_matrix=migration_matrix,
             demographic_events=demographic_events)
         dp.print_history(f)
         f.seek(0)
         debug_output = f.read()
     # TODO when there is better output, write some tests to
     # verify its format.
     self.assertGreater(len(debug_output), 0)
Beispiel #24
0
def nucleotide_diversity_ILS(paramsdict, tree, debug=False):
    generation_time = 1
    population_configurations = []
    demographic_events = []
    msp_idx = 0
    ## Traverse the tree generating population configurations for tips and
    ## nodes. Kind of annoying.
    for i, node in enumerate(tree.treenode.traverse("postorder")):
        children = node.get_descendants()
        if len(children) == 0:
            pop = msprime.PopulationConfiguration(sample_size=5,
                                                  initial_size=node.Ne)
            population_configurations.append(pop)
            node.add_feature("msprime_idx", msp_idx)
            if debug:
                print("I'm a tip {} - {}".format(node.idx, node.msprime_idx))
            msp_idx += 1
        else:
            chidx = [c.msprime_idx for c in children]
            gens = node.height * 1e6 / generation_time
            mig = msprime.MassMigration(time=gens,
                                        source=chidx[0],
                                        dest=chidx[1])
            demographic_events.append(mig)
            node.add_feature("msprime_idx", chidx[1])
            if debug:                print("I'm a node {} ({}) [gens {}]- {}".format(node.idx, node.msprime_idx, gens,\
                                                      " ".join(map(lambda x: str(x), chidx))))
    ## Sort the demographic events
    demographic_events = sorted(demographic_events, key=lambda x: x.time)

    if debug:
        dd = msprime.DemographyDebugger(
            population_configurations=population_configurations,
            demographic_events=demographic_events)
        dd.print_history()

    ## Do the simulation
    ts = msprime.simulate(population_configurations=population_configurations,
                          demographic_events=demographic_events,
                          length=paramsdict["sequence_length"],
                          mutation_rate=paramsdict["mutation_rate"])

    pop_inds = {}
    for pop in ts.populations():
        pop_inds[pop.id] = ts.samples(pop.id)
    pis = ts.diversity(list(pop_inds.values()))

    for pi, leaf in zip(pis, tree.treenode.get_leaves()):
        leaf.pi = pi

    return ts, tree
Beispiel #25
0
    def get_demography_debugger(self):
        """
        Returns an :class:`msprime.DemographyDebugger` instance initialized
        with the parameters for this model. Please see the msprime documentation
        for details on how to use a DemographyDebugger.

        :return: A DemographyDebugger instance for this DemographicModel.
        :rtype: msprime.DemographyDebugger
        """
        ddb = msprime.DemographyDebugger(
            population_configurations=self.population_configurations,
            migration_matrix=self.migration_matrix,
            demographic_events=self.demographic_events)
        return ddb
Beispiel #26
0
def mad_1():
    for chrom in range(len(muts)):
        print(chrom)
        generation_time = 1

        # Population IDs correspond to their indexes in the population configuration array.
        # 0: Cvi, 1: High Atlas
        N_start = 5000
        r_start = 0.0
        population_configurations = [
            msprime.PopulationConfiguration(sample_size=252,
                                            initial_size=N_start,
                                            growth_rate=r_start),
            msprime.PopulationConfiguration(sample_size=16,
                                            initial_size=1e5,
                                            growth_rate=0.0)
        ]
        migration_matrix = [[0, 0], [0, 0]]

        demographic_events = []
        demographic_events.append(
            msprime.MassMigration(time=splitTime,
                                  source=0,
                                  destination=1,
                                  proportion=1.0))

        # Use the demography debugger to print out the demographic history
        dp = msprime.DemographyDebugger(
            Ne=N_start,
            population_configurations=population_configurations,
            migration_matrix=migration_matrix,
            demographic_events=demographic_events)
        dp.print_history()
        #
        # to run the required number of replicates.
        num_replicates = repl  # 1 #e6
        replicates = msprime.simulate(
            mutation_rate=muts[chrom],
            population_configurations=population_configurations,
            migration_matrix=migration_matrix,
            demographic_events=demographic_events,
            recombination_rate=1.8e-8,
            length=winSize,
            num_replicates=num_replicates)
        for i, tree_sequence in enumerate(replicates):
            with open(
                    "Path_to_working_directory/vcf/cvi_winToHa" +
                    str(splitTime) + "_Chr" + str(chrom) + "_" + str(iterate) +
                    "_" + str(i) + ".vcf", "w") as vcf_file:
                tree_sequence.write_vcf(vcf_file, 1)
Beispiel #27
0
def popn_coal_rate(model, pop_id, n_samp, generation_time, steps=None):
    """
    returns tuple (coal_rate, P, steps) for pop_id
    conditional on the model and configuration
    """
    ddb = msprime.DemographyDebugger(**model.asdict())
    if steps is None:
        end_time = ddb.epochs[-2].end_time + 10000
        steps = np.linspace(1, end_time, end_time + 1)
    num_samples = [0 for _ in range(ddb.num_populations)]
    num_samples[pop_id] = n_samp
    coal_rate, P = ddb.coalescence_rate_trajectory(
        steps=steps, num_samples=num_samples, double_step_validation=False)
    steps = steps * generation_time
    return coal_rate, P, steps
Beispiel #28
0
def one_bin(NA, N1, N2, Ts, M):
    NA = NA
    N1 = N1
    N2 = N2
    Ts = Ts
    M_ave = M
    population_configurations = [
        msprime.PopulationConfiguration(sample_size=50, initial_size=N1),
        msprime.PopulationConfiguration(sample_size=50, initial_size=N2)
    ]
    migration_matrix = [[0, M_ave], [M_ave, 0]]
    demographic_events = [
        msprime.MassMigration(time=Ts, source=1, destination=0, proportion=1.0)
    ]

    dp = msprime.DemographyDebugger(
        Ne=NA,
        population_configurations=population_configurations,
        migration_matrix=migration_matrix,
        demographic_events=demographic_events)
    #dp.print_history()

    replicates = 1
    length = 100000
    sim = msprime.simulate(Ne=NA,
                           population_configurations=population_configurations,
                           migration_matrix=migration_matrix,
                           demographic_events=demographic_events,
                           mutation_rate=1e-7,
                           recombination_rate=1e-8,
                           length=length,
                           num_replicates=replicates)

    pairwise_diff = []
    for j, s in enumerate(sim):
        s0 = len(s.get_samples(0))
        s1 = len(s.get_samples(1))
        haps = [h for h in s.haplotypes()]
        h0 = haps[0:s0]
        h1 = haps[s0:s0 + s1 - 1]

        for hap0 in h0:
            for hap1 in h1:
                pairwise_diff.append(
                    sum(1 for a, b in zip(hap0, hap1) if a != b))
        #pairwise_diff.append([pairwise_diffs(hap0,hap1) for hap0 in h0 for hap1 in h1])

    return (np.var(np.array(pairwise_diff)))
Beispiel #29
0
def plot_rainbow(input, output, g=30, model=None, steps=None, pop_id=1):
    """ 
    Creates a plot of all iterations by colour. This plot is useful for assessing convergence.

    :param str input: The full file path to an aggregated output file from :code:`smcsmc`.
    :param str output: Filepath to save the plot.
    :param int g: The length of one generation in years. This is used to scale the x axis.
    :param stdpopsim.Model model: Model for plotting.
    :param float ymax: The maximum y value to plot. This is used to scale the plots up or down.  
    :param int steps: Don't worry about this.
    :param int pop_id: If your model includes multiple populations, which one do you want to plot?

    .. figure:: ../img/rainbow.png
        :align: center
 """

    f, ax = plt.subplots(figsize=(7, 7))
    ax.set(xscale="log", yscale="log")
    ax.set_ylim([10e2, 10e4])
    ax.set_xlim([1e3, 1e6])

    if model == "ooa":
        if stdpopsim is not None:
            model = getattr(stdpopsim.homo_sapiens,
                            "GutenkunstThreePopOutOfAfrica")()
        else:
            print("Module stdpopsim not available - cannot go on.")
            sys.exit(1)

    if model is not None:
        import msprime  # needs to be installed.
        ddb = msprime.DemographyDebugger(**model.asdict())
        if steps is None:
            end_time = ddb.epochs[-2].end_time + 10000
            steps = np.exp(np.linspace(1, np.log(end_time), 31))
        num_samples = [0 for _ in range(ddb.num_populations)]
        num_samples[pop_id] = 20
        coal_rate, P = ddb.coalescence_rate_trajectory(
            steps=steps, num_samples=num_samples, double_step_validation=False)
        steps = steps * g
        ax.plot(steps, 1 / (2 * coal_rate), c="black", drawstyle='steps-pre')

    nt = pd.read_csv(input, sep='\s+')
    nt['Start'] *= g
    for k, g in nt.groupby(['Iter']):
        g.plot(x='Start', y='Ne', ax=ax, drawstyle='steps-pre', label=k)
    ax.legend(title='EM Iterations', ncol=5)
    f.savefig(output)
Beispiel #30
0
 def debugger(self):
     """ A debugger to run before the simulation. """
     # SLiM debugging
     slim = self.slim_script
     print('\nSLiM input file:', slim)
     # Test 1: is an output file saved?
     with open(slim ,'r') as f:
         lines = f.readlines()
         string_pre = ".treeSeqOutput("
         string_post = ")"
         ind = 0
         for line in lines:
             if string_pre in line and string_post in line:
                 out_file = line.split(string_pre)[1].split(string_post)[0]
                 self.slim_out = out_file.strip('""')
                 print('SLiM output file:', self.slim_out)
                 ind = 1
         if ind == 0:
             print(
 """SLiM error:
 Oh no, your script does not produce a .trees file!
 Please ensure you include a call to 'treeSeqOutput()' at the end of your script.
                 """)
     # Test 2: subsampling
     if self.populations is not None or self.sample_sizes is not None:
         if len(self.populations) != len(self.sample_sizes):
             print(
 """ Subsampling error:
 The list of populations to sample from must have the same length
 as the list of sample sizes."""
 )
         print("We are sampling:")
         for ind in range(len(self.populations)):
             print("-", self.sample_sizes[ind], "individuals from population", 
                 self.populations[ind])
     else:
         "No subsampling will be performed."
     # Test 3: demography debugging in recapitation
     print('Ancient demography:')
     dd = msprime.DemographyDebugger(
         population_configurations=self.ancient_population_configurations,
         demographic_events=self.ancient_demographic_events)
     dd.print_history()
     # Test 4: Adding variation
     # Neutral mutations
     print('Neutral mutation rate:', self.neutral_mutation_rate)