Beispiel #1
0
def set_up_demography(t78, t68, t85, t54, t10, t20, t03, t93, t34, t410, ta1, ta2, ta3, ta4, f):
    #divergence of source populations (topology of tree)
    source_divergence = [msp.MassMigration(time=t78, source=PAP, destination=AYT, proportion=1),
                        msp.MassMigration(time=t68, source=EAS, destination=AYT, proportion=1),
                        msp.MassMigration(time=t85, source=AYT, destination=CEU, proportion=1), 
                        msp.MassMigration(time=t54, source=CEU, destination=AFR, proportion=1), 
                        msp.MassMigration(time=t10, source=DEN1, destination=DEN0, proportion=1), 
                        msp.MassMigration(time=t20, source=DEN2, destination=DEN0, proportion=1),
                        msp.MassMigration(time=t03, source=DEN0, destination=DEN3, proportion=1),
                        msp.MassMigration(time=t93, source=NEA, destination=DEN3, proportion=1),
                        msp.MassMigration(time=t34, source=DEN3, destination=AFR, proportion=1),
                        msp.MassMigration(time=t410, source=AFR, destination=CHM, proportion=1)] 
    #admixture times and proportions
    admix = [msp.MassMigration(time=ta1, source=AYT, destination=DEN2, proportion=f[2]), #fraction from Denisovan 2 to Ayta
            msp.MassMigration(time=ta2, source=AYT, destination=DEN1, proportion=f[1]), #fraction from Denisovan 1 to Ayta/Papuan
            msp.MassMigration(time=ta3, source=CEU, destination=NEA, proportion=f[2]), #fraction from Neanderthal to OoA pops
            msp.MassMigration(time=ta4, source=AYT, destination=EAS, proportion=f[0])] #fraction from East Asian 2 to Ayta
    #population parameter changes
    N_change = [msp.PopulationParametersChange(time=2400, initial_size=2000, growth_rate=0, population_id=CEU),
               msp.PopulationParametersChange(time=2800, initial_size=5000, growth_rate=0, population_id=CEU)]
    #combine and sort the demography
    demography = source_divergence + admix + N_change
    return sorted(demography, key = lambda x: x.time)
Beispiel #2
0
def exponential_model(args, print=False):
    """Single population model with sudden population size increase from N1 to N2
    at time T1 and exponential growth at time T2"""

    params, randomize, i, proposals = args
    necessary_params = ["mu", "r", "T1", "N1", "T2", "N2", "growth"]
    for p in necessary_params:
        if p not in list(params.keys()):
            print(
                "Invalid combination of parameters. "
                "Needed: mu | r | T1 | N1 | T2 | N2 | growth"
            )

    if proposals:
        mu, r, T1, N1, T2, N2, growth = [
            params[p].prop(i) if params[p].inferable else params[p].val
            for p in necessary_params
        ]
    else:
        mu, r, T1, N1, T2, N2, growth = [
            params[p].rand() if randomize else params[p].val for p in necessary_params
        ]

    N0 = N1 / math.exp(-growth * T1)

    # Time is given in generations unit (t/25)
    demographic_events = [
        msprime.PopulationParametersChange(time=0, initial_size=N0, growth_rate=growth),
        msprime.PopulationParametersChange(time=T1, initial_size=N1, growth_rate=0),
        msprime.PopulationParametersChange(time=T2, initial_size=N2),
    ]

    if print:
        debugger = msprime.DemographyDebugger(Ne=N0, demographic_events=demographic_events)
        debugger.print_history()

    return demographic_events, mu, r
Beispiel #3
0
    def __init__(self):
        # Model from paper https://doi.org/10.1371/journal.pcbi.1004845

        # Parameters are taken from table 7 using the average stat prediction values
        # as those were generally stated to be the best
        N_1 = 544.2e3  # recent
        N_2 = 145.3e3  # bottleneck
        N_3 = 652.7e3  # ancestral

        # Times taken from simulating data section based on PSMC and converted to
        # number of generations from coalescent units using the baseline effective
        # population size. Note that the coalescent values are calculated by
        N_ref = 1e5
        t_1_coal = 0.5
        t_2_coal = 5
        T_1 = t_1_coal * 4 * N_ref
        T_2 = (t_1_coal + t_2_coal) * 4 * N_ref

        # Set population sizes at T=0
        self.population_configurations = [
            msprime.PopulationConfiguration(initial_size=N_1, growth_rate=0),
        ]

        # Migration matrix, all migrations to admixed population are 0
        self.migration_matrix = [[0]]

        # Now we add the demographic events working backwards in time.
        self.demographic_events = [
            # Bottleneck
            msprime.PopulationParametersChange(time=T_1,
                                               initial_size=N_2,
                                               population_id=0),
            # Ancestral population size
            msprime.PopulationParametersChange(time=T_2,
                                               initial_size=N_3,
                                               population_id=0),
        ]
Beispiel #4
0
 def configure_demography(self):
     self.demographic_events = []
     self.pc = [msprime.PopulationConfiguration(self.sample_size)]
     for index in self.demography.index:
         if index == self.demography.shape[0] - 1: break
         forward_time = self.demography['generation'][index + 1]
         forward_size = self.demography['size'][index + 1]
         now_time = self.demography['generation'][index]
         now_size = self.demography['size'][index]
         g = (math.log(now_size) - math.log(forward_size)) / (forward_time -
                                                              now_time)
         self.demographic_events.append(
             msprime.PopulationParametersChange(now_time,
                                                now_size,
                                                growth_rate=0))
Beispiel #5
0
def mazet2016_3b(print_):
    N_0 = 10000
    # initially.
    population_configurations = [
        msprime.PopulationConfiguration(
            sample_size=2, initial_size=N_0, growth_rate=0),
    ]
    demographic_events = [
        msprime.PopulationParametersChange(time=10000, initial_size=N_0/10,growth_rate=0),
        msprime.PopulationParametersChange(time=60000, initial_size=N_0,growth_rate=0),
        msprime.PopulationParametersChange(time=200000, initial_size=N_0/2,growth_rate=0)
    ]
    # Use the demography debugger to print out the demographic history
    # that we have just described.
    dd = msprime.DemographyDebugger(
        population_configurations=population_configurations,
        demographic_events=demographic_events)
    if print_:
        print('Demographic history:\n')
        dd.print_history()
    sim = msprime.simulate(population_configurations=population_configurations,
                           demographic_events=demographic_events, length=150e+6, recombination_rate=2e-08,
                           mutation_rate=2e-08)
    return sim
Beispiel #6
0
 def test_growth_rate_and_size_change(self):
     g = 1024
     growth_rate = 2
     initial_size = 8192
     event = msprime.PopulationParametersChange(
         time=g, initial_size=initial_size,
         growth_rate=growth_rate, population_id=1)
     ll_event = {
         "type": "population_parameters_change",
         "time": g,
         "population_id": 1,
         "initial_size": initial_size,
         "growth_rate": growth_rate
     }
     self.assertEqual(event.get_ll_representation(1), ll_event)
def split_times(split_time):
    T1 = split_time / gen_time
    demographic_events = [
        #joining DIN to NGS
        msprime.MassMigration(time=T1, source=0, destination=1, proportion=1),
        #joining DIN with BSJ
        msprime.MassMigration(time=T2, source=1, destination=2, proportion=1),
        #bottleneck for CHW
        msprime.PopulationParametersChange(time=T3,
                                           initial_size=13000,
                                           population_id=3),
        #joining CHW with BSJ
        msprime.MassMigration(time=T4, source=2, destination=3, proportion=1)
    ]
    return demographic_events
Beispiel #8
0
 def __init__(self, N0, growth_rate, *args):
     self.population_configurations = [
         msprime.PopulationConfiguration(
             initial_size=N0,
             growth_rate=growth_rate,
             metadata=self.populations[0].asdict())
     ]
     self.migration_matrix = [[0]]
     self.demographic_events = []
     for t, initial_size, growth_rate in args:
         self.demographic_events.append(
             msprime.PopulationParametersChange(time=t,
                                                initial_size=initial_size,
                                                growth_rate=growth_rate,
                                                population_id=0))
Beispiel #9
0
def HuberThreeEpoch():
    id = "QC-African3Epoch_1H18"
    populations = [
        stdpopsim.Population(id="SouthMiddleAtlas", description="A. thalina"),
    ]

    # Time of second epoch
    T_2 = 7420
    T_3 = 14534
    # population sizes
    N_ANC = 161744
    N_2 = 24076
    N_3 = 203077

    return stdpopsim.DemographicModel(
        id=id,
        description=id,
        long_description=id,
        generation_time=_species.generation_time,
        populations=populations,
        population_configurations=[
            msprime.PopulationConfiguration(initial_size=N_3,
                                            metadata=populations[0].asdict()),
        ],
        demographic_events=[
            msprime.PopulationParametersChange(time=T_3,
                                               initial_size=N_2,
                                               population_id=0),
            msprime.PopulationParametersChange(time=T_2 + T_3,
                                               initial_size=N_ANC,
                                               population_id=0),
        ],
        population_id_map=[{
            "SouthMiddleAtlas": 0
        }] * 3,
    )
Beispiel #10
0
 def test_different_types(self):
     events = [
         msprime.PopulationParametersChange(time=1, initial_size=1),
         msprime.MigrationRateChange(time=1, rate=1),
         msprime.MassMigration(time=1, source=1),
         msprime.SimpleBottleneck(time=1)]
     for a, b in itertools.combinations(events, 2):
         self.assertFalse(models.demographic_events_equal([a], [b], 1))
         self.assertFalse(models.demographic_events_equal([b], [a], 1))
         self.assertTrue(models.demographic_events_equal([a], [a], 1))
         self.assertTrue(models.demographic_events_equal([b], [b], 1))
         with self.assertRaises(models.UnequalModelsError):
             models.verify_demographic_events_equal([b], [a], 1)
         with self.assertRaises(models.UnequalModelsError):
             models.verify_demographic_events_equal([a], [b], 1)
Beispiel #11
0
 def test_bad_extended_events(self):
     engine = stdpopsim.get_engine("slim")
     for bad_ee in [
             msprime.PopulationParametersChange(time=0, initial_size=100),
             None,
         {},
             "",
     ]:
         with self.assertRaises(ValueError):
             engine.simulate(
                 demographic_model=self.model,
                 contig=self.contig,
                 samples=self.samples,
                 extended_events=[bad_ee],
                 dry_run=True,
             )
Beispiel #12
0
    def __init__(self):
        super().__init__()

        self.generation_time = 29
        N0 = 14312

        g_1 = 0.023025
        t_1 = 33.333
        n_1 = N0

        g_2 = -0.005756
        t_2 = 133.33
        n_2 = N0 / 10

        g_3 = 0.0014391
        t_3 = 533.33
        n_3 = N0

        g_4 = -0.00035977
        t_4 = 2133.33
        n_4 = N0 / 10

        g_5 = 8.99448e-5
        t_5 = 8533.33
        n_5 = N0

        n_ancient = N0 / 10
        t_ancient = 34133.31

        self.population_configurations = [
            msprime.PopulationConfiguration(initial_size=N0)
        ]

        self.migration_matrix = [[0]]

        self.demographic_events = [
            msprime.PopulationParametersChange(initial_size=n_1,
                                               time=t_1,
                                               growth_rate=g_1),
            msprime.PopulationParametersChange(initial_size=n_2,
                                               time=t_2,
                                               growth_rate=g_2),
            msprime.PopulationParametersChange(initial_size=n_3,
                                               time=t_3,
                                               growth_rate=g_3),
            msprime.PopulationParametersChange(initial_size=n_4,
                                               time=t_4,
                                               growth_rate=g_4),
            msprime.PopulationParametersChange(initial_size=n_5,
                                               time=t_5,
                                               growth_rate=g_5),
            msprime.PopulationParametersChange(time=t_ancient,
                                               initial_size=n_ancient,
                                               growth_rate=0)
        ]
Beispiel #13
0
    def simulate(self, **kwargs) -> "tskit.TreeSequence":
        """Simulate (using msprime) under demographic represented by self.

        Args:
            **kwargs: Passed through to msprime.simulate().

        Returns:
            Tree sequence containing simulated data.
        """
        import msprime as msp

        de = [
            msp.PopulationParametersChange(time=tt, initial_size=Ne_t)
            for tt, Ne_t in zip(self.t, self.Ne[:-1])
        ]
        kwargs.update({"demographic_events": de})
        return msp.simulate(**kwargs)
Beispiel #14
0
def simulate(args):
    rho = 1e-8
    all_lengths = [
        247249719, 242951149, 199501827, 191273063, 180857866, 170899992,
        158821424, 146274826, 140273252, 135374737, 134452384, 132349534,
        114142980, 106368585, 100338915, 88827254, 78774742, 76117153,
        63811651, 62435964, 46944323, 49691432
    ]
    chrom_lengths = all_lengths[:args.num_chroms]
    num_loci = chrom_lengths[-1] + 1

    positions, rates = get_positions_rates(chrom_lengths, rho)
    recombination_map = msprime.RecombinationMap(positions,
                                                 rates,
                                                 num_loci=num_loci)

    population_configurations = [
        msprime.PopulationConfiguration(sample_size=args.Ne,
                                        initial_size=args.Ne),
        msprime.PopulationConfiguration(sample_size=0, initial_size=args.Ne)
    ]

    demographic_events = [
        msprime.MassMigration(time=args.admixture_time,
                              source=0,
                              dest=1,
                              proportion=args.admixture_prop),
        msprime.MassMigration(time=args.admixture_time + 1,
                              source=1,
                              dest=0,
                              proportion=1),
        msprime.PopulationParametersChange(time=args.admixture_time + 2,
                                           initial_size=1.0,
                                           population_id=0)
    ]

    replicates = msprime.simulate(
        recombination_map=recombination_map,
        demographic_events=demographic_events,
        population_configurations=population_configurations,
        model=args.model,
        record_migrations=True,
        num_replicates=args.replicates)

    return replicates
def sim_chrom(n_chrom,
              chrom_len,
              recombination_rate,
              mutation_rate,
              sample_size,
              Ne_ancestral,
              Ne_recent,
              Ne_switchdate,
              print_history=False):

    #recombination map
    rec_map = make_rec_map(n_chrom, chrom_len, recombination_rate)

    #population_configurations = [
    #    msprime.PopulationConfiguration(
    #        sample_size=sample_size, initial_size=Ne_recent)
    #]
    demographic_events = [
        msprime.PopulationParametersChange(time=Ne_switchdate,
                                           initial_size=Ne_ancestral,
                                           population_id=0)
    ]
    # Use the demography debugger to print out the demographic history
    # that we have just described.
    #dp = msprime.DemographyDebugger(
    #    Ne=Ne_recent,
    #    population_configurations=population_configurations,
    #    demographic_events=demographic_events)
    #if print_history:
    #    dp.print_history()
    #   return None
    #else:
    tree_sequence = msprime.simulate(
        sample_size=sample_size,
        Ne=Ne_recent,
        recombination_map=rec_map,
        mutation_rate=mutation_rate,
        #population_configurations = population_configurations,
        demographic_events=demographic_events,
        #random_seed = random_seed
    )
    shape = tree_sequence.get_num_mutations(), tree_sequence.get_sample_size()
    print 'num_mutations: {}, samples: {}'.format(*shape)
    return (tree_sequence)
Beispiel #16
0
 def test_single_growth_rate_size_change(self):
     # Set out our values in units of generations and absolute sizes.
     Ne = 1000
     growth_rate = -0.01
     end_time = 20
     end_size = Ne * math.exp(-growth_rate * end_time)
     new_size = 4 * Ne
     population_configurations = [
         msprime.PopulationConfiguration(sample_size=2,
                                         initial_size=Ne,
                                         growth_rate=growth_rate)
     ]
     demographic_events = [
         msprime.PopulationParametersChange(time=end_time,
                                            initial_size=new_size,
                                            growth_rate=0)
     ]
     simulator = msprime.simulator_factory(
         Ne=Ne,
         population_configurations=population_configurations,
         demographic_events=demographic_events)
     ll_sim = simulator.create_ll_instance()
     ll_end_time = ll_sim.debug_demography()
     self.assertEqual(end_time, ll_end_time * 4 * Ne)
     populations = [
         msprime.Population(Ne=Ne, **d)
         for d in ll_sim.get_population_configuration()
     ]
     self.assertEqual(len(populations), 1)
     pop = populations[0]
     self.assertEqual(pop.growth_rate, growth_rate)
     self.assertEqual(pop.initial_size, Ne)
     self.assertEqual(pop.get_size(end_time), end_size)
     # Now fast forward to the next time slice.
     ll_end_time = ll_sim.debug_demography()
     self.assertTrue(math.isinf(ll_end_time))
     populations = [
         msprime.Population(Ne=Ne, **d)
         for d in ll_sim.get_population_configuration()
     ]
     pop = populations[0]
     self.assertEqual(pop.growth_rate, 0)
     self.assertEqual(pop.initial_size, new_size)
     self.assertEqual(pop.get_size(10), new_size)
Beispiel #17
0
    def _set_demography(self):
        demography = []
        N0 = None
        line_num = 0
        t = 0
        for line in open(self.demo_file, "r+"):
            spltln = line.split()
            Nt = int(spltln[1])
            if spltln[2] != "inf":
                deltat = int(spltln[2])
                if line_num == 0:
                    N0 = Nt
                line_num += 1
                t += deltat
                demography.append(
                    msp.PopulationParametersChange(time=t, initial_size=Nt))

        self.pop_config = [msp.PopulationConfiguration(initial_size=N0)]
        self.demography = demography
Beispiel #18
0
def finding_nearest_neighbors():
    samples = [
        msprime.Sample(0, 0),
        msprime.Sample(0, 1),
        msprime.Sample(0, 20),
    ]
    ts = msprime.simulate(
        Ne=1e6,
        samples=samples,
        demographic_events=[
            msprime.PopulationParametersChange(time=10,
                                               growth_rate=2,
                                               population_id=0),
        ],
        random_seed=42,
    )

    tree = ts.first()
    tree.draw_svg("_static/different_time_samples.svg", time_scale="rank")
def migration_example(num_replicates=1):
    M = read_migration_matrix("nussear_8x.migr.tsv")
    # M = read_migration_matrix("nussear.migr.tsv")
    assert (M.shape[0] == M.shape[1])
    n = M.shape[0]

    # sample one individual per deme
    population_configurations = [
        msprime.PopulationConfiguration(sample_size=1) for _ in range(n)
    ]

    # suppose pop size changes have been sinusoidal
    change_times = range(100)
    change_factors = [
        1.0 + 0.5 * np.sin(2 * np.pi * t / 20) for t in change_times
    ]
    demographic_events = [
        msprime.PopulationParametersChange(time=t,
                                           initial_size=x,
                                           growth_rate=0)
        for t, x in zip(change_times, change_factors)
    ]

    # Use the demography debugger to print out the demographic history
    # that we have just described.
    dd = msprime.DemographyDebugger(
        population_configurations=population_configurations,
        migration_matrix=M,
        demographic_events=demographic_events)
    dd.print_history()

    # run the simulation
    replicates = msprime.simulate(
        population_configurations=population_configurations,
        demographic_events=demographic_events,
        migration_matrix=M,
        num_replicates=num_replicates)

    # And then iterate over these replicates
    T = np.zeros(num_replicates)
    for i, tree_sequence in enumerate(replicates):
        tree = tree_sequence.first()
        T[i] = tree.time(tree.root) / 4
Beispiel #20
0
def main():
    blockL = int(sys.argv[1])
    winL = int(sys.argv[2])
    #num_replicates is the # of windows
    num_replicates = int(sys.argv[3])
    #nsub is the sizes of the subsample
    nsub = int(sys.argv[4])
    #nwhole is the sizes of the subsample
    nwhole = int(sys.argv[5])
    """
   Demographic parameters specifying a step change in Ne to Ne/scal in a single population at time T_bott
    """
    recr = 0
    scal = 0.5
    Ne = 4.05596e5
    T_bott = 2e5
    mu = 3.46e-9
    population_configurations = [
        msprime.PopulationConfiguration(sample_size=nwhole, initial_size=Ne)
    ]
    demographic_events = [
        msprime.PopulationParametersChange(time=T_bott,
                                           initial_size=Ne / scal,
                                           population_id=0)
    ]
    # combos lists positions of subsampled inds:
    combos = list(itertools.combinations(range(nwhole), nsub))
    # this generates the actual replicates
    replicates = msprime.simulate(
        num_replicates=num_replicates,
        length=winL,
        recombination_rate=recr,
        population_configurations=population_configurations,
        demographic_events=demographic_events,
        mutation_rate=mu)

    myBlockDict = makeblock_ali(replicates, blockL, winL)
    out = multibSFSall(myBlockDict, combos, nsub)
    breakpoint()
    outmath = str(out).replace("[", "{").replace("]", "}").replace(
        "(", "{").replace(")", "}")
    print(outmath)
Beispiel #21
0
    def __init__(self):

        # Time of second epoch
        T_2 = 568344
        # population sizes
        N_ANC = 746148
        N_2 = 100218

        self.population_configurations = [
            msprime.PopulationConfiguration(
                initial_size=N_2, metadata=self.populations[0].asdict()),
        ]

        self.migration_matrix = [[0]]

        self.demographic_events = [
            msprime.PopulationParametersChange(time=T_2,
                                               initial_size=N_ANC,
                                               population_id=0),
        ]
Beispiel #22
0
def _afr_2epoch():
    N_A = 746148
    N_0 = 100218
    t_1 = 568344
    populations = [
        stdpopsim.Population(
            id="SouthMiddleAtlas",
            description="Arabidopsis Thaliana South Middle Atlas population",
        )
    ]
    return stdpopsim.DemographicModel(
        id="African2Epoch_1H18",
        description="South Middle Atlas African two epoch model",
        long_description="""
            Model estimated from site frequency spectrum of synonymous
            SNPs from African South Middle Atlas samples using
            Williamson et al. 2005 methodology. Values come from supplementary
            table 1 of Huber et al 2018. Sizes change from N_A -> N_0 and t_1 is
            time of the second epoch.
        """,
        populations=populations,
        citations=[
            stdpopsim.Citation(
                author="Huber et al.",
                year=2018,
                doi="https://doi.org/10.1038/s41467-018-05281-7",
                reasons={stdpopsim.CiteReason.DEM_MODEL},
            )
        ],
        generation_time=1,
        population_configurations=[
            msprime.PopulationConfiguration(
                initial_size=N_0, metadata=populations[0].asdict()
            )
        ],
        demographic_events=[
            msprime.PopulationParametersChange(
                time=t_1, initial_size=N_A, population_id=0
            )
        ],
    )
    def __init__(self):
        # This is a split-migration style model, with exponential growth or
        # decay allowed in each population after the split. They assumed a
        # generation time of 20 years and a mutation rate of 2e-8 per bp per gen
        generation_time = 20

        # Parameters given in Table S21-2
        Ne = 17934
        s = 0.592
        NB0 = s * Ne
        NS0 = (1 - s) * Ne
        NBF = 8805
        NSF = 37661
        mSB = 0.395 / 2 / Ne
        mBS = 0.239 / 2 / Ne
        T = 403149 / generation_time

        rB = np.log(NBF / NB0) / T
        rS = np.log(NSF / NS0) / T

        # pop 0 is Bornean, pop 1 is Sumatran
        self.population_configurations = [
            msprime.PopulationConfiguration(initial_size=NBF, growth_rate=rB),
            msprime.PopulationConfiguration(initial_size=NSF, growth_rate=rS)
        ]

        self.migration_matrix = [[0, mBS], [mSB, 0]]

        self.demographic_events = [
            # merge, turn off migration, change size and growth rate
            msprime.MassMigration(source=1,
                                  destination=0,
                                  time=T,
                                  proportion=1),
            msprime.MigrationRateChange(time=T, rate=0),
            msprime.PopulationParametersChange(time=T,
                                               initial_size=Ne,
                                               growth_rate=0,
                                               population_id=0)
        ]
Beispiel #24
0
    def _set_demography(self, scale=1.0):
        """Establish the demography, allowing for a scale parameter."""
        demography = []
        N0 = None
        line_num = 0
        t = 0
        for line in open(self.demo_file, "r+"):
            spltln = line.split()
            Nt = int(spltln[1]) * scale
            if spltln[2] != "inf":
                deltat = int(spltln[2])
                if line_num == 0:
                    N0 = Nt * scale
                line_num += 1
                t += deltat
                demography.append(
                    msp.PopulationParametersChange(time=t, initial_size=Nt)
                )

        # Setting the population configurations / demography
        self.pop_config = [msp.PopulationConfiguration(initial_size=N0)]
        self.demography = demography
Beispiel #25
0
 def exp_decline(self, N0=100, N1=1000, T=1000):
     """
     One population model with exponential decline in population size.
     Used for testing that growth rates are handled appropriately.
     """
     r = math.log(N0 / N1) / T
     return stdpopsim.DemographicModel(
             id="exp_decline",
             description="exp_decline",
             long_description="exp_decline",
             populations=[stdpopsim.models._pop0],
             generation_time=1,
             population_configurations=[
                 msprime.PopulationConfiguration(
                     initial_size=N0, growth_rate=r,
                     metadata=stdpopsim.models._pop0.asdict())
                 ],
             demographic_events=[
                 msprime.PopulationParametersChange(
                     time=T, initial_size=N1, growth_rate=0, population_id=0),
                 ],
             )
    def __init__(self):

        # the size during the interval times[k] to times[k+1] = sizes[k]
        self.times = np.array([
            699, 2796, 6068, 9894, 14370, 19606, 25730, 32894, 41275, 51077,
            62544, 75958, 91648, 110001, 131471, 156584, 185960, 220324,
            260520, 307540, 362541, 426879, 502139, 590173, 693151, 813610,
            954517, 1119341, 1312147, 1537686, 1801500, 2110100
        ])
        self.sizes = np.array([
            42252426, 42252426, 60323, 72174, 40591, 21158, 21442, 39942,
            78908, 111132, 110745, 96283, 87661, 83932, 83829, 91813, 111644,
            143456, 181571, 217331, 241400, 246984, 238593, 228222, 217752,
            198019, 165210, 121796, 121796, 73989, 73989, 73989
        ])

        # MSMC is accurate from 40Kya-1.6Mya for A.thaliana(Durvasula et al 2017)
        # set the first 7 sizes
        # equal to the size at 8 (~40Kya)
        self.sizes[:8] = self.sizes[8]
        # set the last 2 entries equal
        # to the size at 30 (~1.6Mya)
        self.sizes[30:32] = self.sizes[30]
        # generation time is 1 year
        self.generation_time = default_generation_time
        self.demographic_events = []
        for idx, t in enumerate(self.times):
            self.demographic_events.append(
                msprime.PopulationParametersChange(
                    time=t, initial_size=self.sizes[idx], population_id=0))

        self.migration_matrix = [[0]]

        self.population_configurations = [
            msprime.PopulationConfiguration(initial_size=self.sizes[0])
        ]
Beispiel #27
0
def ooa_3(N_A=7300, N_B=2100, N_AF=12300, N_EU0=1000, N_AS0=510, r_EU=0.004, \
    r_AS=0.0055, T_AF=8800, T_B=5600, T_EU_AS=848, m_AF_B=25e-5, m_AF_EU=3e-5,
    m_AF_AS=1.9e-5, m_EU_AS=9.6e-5):
    id = "OutOfAfrica_3G09"
    description = "Three population out-of-Africa"
    long_description = """
        The three population Out-of-Africa model from Gutenkunst et al. 2009.
        It describes the ancestral human population in Africa, the out of Africa
        event, and the subsequent European-Asian population split.
        Model parameters are the maximum likelihood values of the
        various parameters given in Table 1 of Gutenkunst et al.
    """
    populations = [_yri_population, _ceu_population, _chb_population]
    '''citations = [stdpopsim.Citation(
        author="Gutenkunst et al.",
        year=2009,
        doi="https://doi.org/10.1371/journal.pgen.1000695",
        reasons={stdpopsim.CiteReason.DEM_MODEL})
    ]'''

    generation_time = 25

    # First we set out the maximum likelihood values of the various parameters
    # given in Table 1.
    #N_A = 7300
    #N_B = 2100
    #N_AF = 12300
    #N_EU0 = 1000
    #N_AS0 = 510
    # Times are provided in years, so we convert into generations.

    #T_AF = 8800
    #T_B = 5600
    #T_EU_AS = 848
    # We need to work out the starting (diploid) population sizes based on
    # the growth rates provided for these two populations
    #r_EU = 0.004
    #r_AS = 0.0055
    N_EU = N_EU0 / math.exp(-r_EU * T_EU_AS)
    N_AS = N_AS0 / math.exp(-r_AS * T_EU_AS)
    # Migration rates during the various epochs.
    #m_AF_B = 25e-5
    #m_AF_EU = 3e-5
    #m_AF_AS = 1.9e-5
    #m_EU_AS = 9.6e-5

    return models.DemographicModel(
        id=id,
        description=description,
        long_description=long_description,
        populations=populations,
        #citations=citations,
        generation_time=generation_time,

        # Population IDs correspond to their indexes in the population
        # configuration array. Therefore, we have 0=YRI, 1=CEU and 2=CHB
        # initially.
        population_configurations=[
            msprime.PopulationConfiguration(initial_size=N_AF,
                                            metadata=populations[0].asdict()),
            msprime.PopulationConfiguration(initial_size=N_EU,
                                            growth_rate=r_EU,
                                            metadata=populations[1].asdict()),
            msprime.PopulationConfiguration(initial_size=N_AS,
                                            growth_rate=r_AS,
                                            metadata=populations[2].asdict()),
        ],
        migration_matrix=[
            [0, m_AF_EU, m_AF_AS],  # noqa
            [m_AF_EU, 0, m_EU_AS],  # noqa
            [m_AF_AS, m_EU_AS, 0],  # noqa
        ],
        demographic_events=[
            # CEU and CHB merge into B with rate changes at T_EU_AS
            msprime.MassMigration(time=T_EU_AS,
                                  source=2,
                                  destination=1,
                                  proportion=1.0),
            msprime.MigrationRateChange(time=T_EU_AS, rate=0),
            msprime.MigrationRateChange(time=T_EU_AS,
                                        rate=m_AF_B,
                                        matrix_index=(0, 1)),
            msprime.MigrationRateChange(time=T_EU_AS,
                                        rate=m_AF_B,
                                        matrix_index=(1, 0)),
            msprime.PopulationParametersChange(time=T_EU_AS,
                                               initial_size=N_B,
                                               growth_rate=0,
                                               population_id=1),
            # Population B merges into YRI at T_B
            msprime.MassMigration(time=T_B,
                                  source=1,
                                  destination=0,
                                  proportion=1.0),
            msprime.MigrationRateChange(time=T_B, rate=0),
            # Size changes to N_A at T_AF
            msprime.PopulationParametersChange(time=T_AF,
                                               initial_size=N_A,
                                               population_id=0)
        ],
    )
Beispiel #28
0
def _sma_1pop():
    # the size during the interval times[k] to times[k+1] = sizes[k]
    times = np.array(
        [
            699,
            2796,
            6068,
            9894,
            14370,
            19606,
            25730,
            32894,
            41275,
            51077,
            62544,
            75958,
            91648,
            110001,
            131471,
            156584,
            185960,
            220324,
            260520,
            307540,
            362541,
            426879,
            502139,
            590173,
            693151,
            813610,
            954517,
            1119341,
            1312147,
            1537686,
            1801500,
            2110100,
        ]
    )
    sizes = np.array(
        [
            42252426,
            42252426,
            60323,
            72174,
            40591,
            21158,
            21442,
            39942,
            78908,
            111132,
            110745,
            96283,
            87661,
            83932,
            83829,
            91813,
            111644,
            143456,
            181571,
            217331,
            241400,
            246984,
            238593,
            228222,
            217752,
            198019,
            165210,
            121796,
            121796,
            73989,
            73989,
            73989,
        ]
    )

    # MSMC is accurate from 40Kya-1.6Mya for A.thaliana (Durvasula et al 2017)
    # set the first 7 sizes
    # equal to the size at 8 (~40Kya)
    sizes[:8] = sizes[8]
    # set the last 2 entries equal
    # to the size at 30 (~1.6Mya)
    sizes[30:32] = sizes[30]

    demographic_events = []
    for sz, t in zip(sizes, times):
        demographic_events.append(
            msprime.PopulationParametersChange(time=t, initial_size=sz, population_id=0)
        )

    populations = [
        stdpopsim.Population(
            id="SouthMiddleAtlas",
            description="Arabidopsis Thaliana South Middle Atlas population",
        )
    ]

    return stdpopsim.DemographicModel(
        id="SouthMiddleAtlas_1D17",
        description="South Middle Atlas piecewise constant size",
        long_description="""
            This model comes from MSMC using two randomly sampled homozygous
            individuals (Khe32 and Ifr4) from the South Middle Atlas region
            from the Middle Atlas Mountains in Morocco. The model is estimated
            with 32 time periods. Because estimates from the recent and ancient
            past are less accurate, we set the population size in the first 7
            time periods equal to the size at the 8th time period and the size
            during last 2 time periods equal to the size in the 30th time
            period.
        """,
        populations=populations,
        citations=[
            stdpopsim.Citation(
                author="Durvasula et al.",
                year=2017,
                doi="https://doi.org/10.1073/pnas.1616736114",
                reasons={stdpopsim.CiteReason.DEM_MODEL},
            )
        ],
        generation_time=1,
        demographic_events=demographic_events,
        population_configurations=[
            msprime.PopulationConfiguration(
                initial_size=sizes[0], metadata=populations[0].asdict()
            )
        ],
    )
Beispiel #29
0
def create_simulation_runner(parser, arg_list):
    """
    Parses the arguments and returns a SimulationRunner instance.
    """
    args = parser.parse_args(arg_list)
    if args.mutation_rate == 0 and not args.trees:
        parser.error("Need to specify at least one of --theta or --trees")
    num_loci = int(args.recombination[1])
    if args.recombination[1] != num_loci:
        parser.error("Number of loci must be integer value")
    if args.recombination[0] != 0.0 and num_loci < 2:
        parser.error("Number of loci must > 1")
    r = 0.0
    # We don't scale recombination or mutation rates by the size
    # of the region.
    if num_loci > 1:
        r = args.recombination[0] / (num_loci - 1)
    mu = args.mutation_rate / num_loci

    # ms uses a ratio to define the GC rate, but if the recombination rate
    # is zero we define the gc rate directly.
    gc_param, gc_tract_length = args.gene_conversion
    gc_rate = 0
    if r == 0.0:
        if num_loci > 1:
            gc_rate = gc_param / (num_loci - 1)
    else:
        gc_rate = r * gc_param

    demography = msprime.Demography.isolated_model([1])
    # Check the structure format.
    symmetric_migration_rate = 0.0
    num_populations = 1
    migration_matrix = [[0.0]]
    num_samples = [args.sample_size]
    if args.structure is not None:
        num_populations = convert_int(args.structure[0], parser)
        # We must have at least num_population sample_configurations
        if len(args.structure) < num_populations + 1:
            parser.error("Must have num_populations sample sizes")
        demography = msprime.Demography.isolated_model([1] * num_populations)
        num_samples = [0] * num_populations
        for j in range(num_populations):
            num_samples[j] = convert_int(args.structure[j + 1], parser)
        if sum(num_samples) != args.sample_size:
            parser.error("Population sample sizes must sum to sample_size")
        # We optionally have the overall migration_rate here
        if len(args.structure) == num_populations + 2:
            symmetric_migration_rate = convert_float(
                args.structure[num_populations + 1], parser
            )
            check_migration_rate(parser, symmetric_migration_rate)
        elif len(args.structure) > num_populations + 2:
            parser.error("Too many arguments to --structure/-I")
        if num_populations > 1:
            migration_matrix = [
                [
                    symmetric_migration_rate / (num_populations - 1) * int(j != k)
                    for j in range(num_populations)
                ]
                for k in range(num_populations)
            ]
    else:
        if len(args.migration_matrix_entry) > 0:
            parser.error(
                "Cannot specify migration matrix entries without "
                "first providing a -I option"
            )
        if args.migration_matrix is not None:
            parser.error(
                "Cannot specify a migration matrix without "
                "first providing a -I option"
            )
    if args.migration_matrix is not None:
        migration_matrix = convert_migration_matrix(
            parser, args.migration_matrix, num_populations
        )
    for matrix_entry in args.migration_matrix_entry:
        pop_i = convert_population_id(parser, matrix_entry[0], num_populations)
        pop_j = convert_population_id(parser, matrix_entry[1], num_populations)
        rate = matrix_entry[2]
        if pop_i == pop_j:
            parser.error("Cannot set diagonal elements in migration matrix")
        check_migration_rate(parser, rate)
        migration_matrix[pop_i][pop_j] = rate

    # Set the initial demography
    if args.growth_rate is not None:
        for population in demography.populations:
            population.growth_rate = args.growth_rate
    for population_id, growth_rate in args.population_growth_rate:
        pid = convert_population_id(parser, population_id, num_populations)
        demography.populations[pid].growth_rate = growth_rate
    for population_id, size in args.population_size:
        pid = convert_population_id(parser, population_id, num_populations)
        demography.populations[pid].initial_size = size

    demographic_events = []
    # First we look at population split events. We do this differently
    # to ms, as msprime requires a fixed number of population. Therefore,
    # modify the number of populations to take into account populations
    # splits. This is a messy hack, and will probably need to be changed.
    for index, (t, population_id, proportion) in args.admixture:
        check_event_time(parser, t)
        pid = convert_population_id(parser, population_id, num_populations)
        if proportion < 0 or proportion > 1:
            parser.error("Proportion value must be 0 <= p <= 1.")
        # In ms, the probability of staying in source is p and the probabilty
        # of moving to the new population is 1 - p.
        event = (index, msprime.MassMigration(t, pid, num_populations, 1 - proportion))
        demographic_events.append(event)

        num_populations += 1
        # We add another element to each row in the migration matrix
        # along with an other row. All new entries are zero.
        for row in migration_matrix:
            row.append(0)
        migration_matrix.append([0 for j in range(num_populations)])
        demography.populations.append(msprime.Population(initial_size=1))
        num_samples.append(0)

    # Add the demographic events
    for index, (t, alpha) in args.growth_rate_change:
        if len(args.admixture) != 0:
            raise_admixture_incompatability_error(parser, "-eG")
        check_event_time(parser, t)
        demographic_events.append(
            (index, msprime.PopulationParametersChange(time=t, growth_rate=alpha))
        )
    for index, (t, population_id, alpha) in args.population_growth_rate_change:
        pid = convert_population_id(parser, population_id, num_populations)
        check_event_time(parser, t)
        demographic_events.append(
            (
                index,
                msprime.PopulationParametersChange(
                    time=t, growth_rate=alpha, population_id=pid
                ),
            )
        )
    for index, (t, x) in args.size_change:
        if len(args.admixture) != 0:
            raise_admixture_incompatability_error(parser, "-eN")
        check_event_time(parser, t)
        demographic_events.append(
            (
                index,
                msprime.PopulationParametersChange(
                    time=t, initial_size=x, growth_rate=0
                ),
            )
        )
    for index, (t, population_id, x) in args.population_size_change:
        check_event_time(parser, t)
        pid = convert_population_id(parser, population_id, num_populations)
        demographic_events.append(
            (
                index,
                msprime.PopulationParametersChange(
                    time=t, initial_size=x, growth_rate=0, population_id=pid
                ),
            )
        )
    for index, (t, pop_i, pop_j) in args.population_split:
        check_event_time(parser, t)
        pop_i = convert_population_id(parser, pop_i, num_populations)
        pop_j = convert_population_id(parser, pop_j, num_populations)
        demographic_events.append((index, msprime.MassMigration(t, pop_i, pop_j, 1.0)))
        # Migration rates from subpopulation i (M[k, i], k != i) are set to zero.
        for k in range(num_populations):
            if k != pop_i:
                event = msprime.MigrationRateChange(t, 0.0, matrix_index=(k, pop_i))
                demographic_events.append((index, event))

    # Demographic events that affect the migration matrix
    if num_populations == 1:
        condition = (
            len(args.migration_rate_change) > 0
            or len(args.migration_matrix_entry_change) > 0
            or len(args.migration_matrix_change) > 0
        )
        if condition:
            parser.error("Cannot change migration rates for 1 population")
    for index, (t, x) in args.migration_rate_change:
        if len(args.admixture) != 0:
            raise_admixture_incompatability_error(parser, "-eM")
        check_migration_rate(parser, x)
        check_event_time(parser, t)
        event = msprime.MigrationRateChange(t, x / (num_populations - 1))
        demographic_events.append((index, event))
    for index, event in args.migration_matrix_entry_change:
        t = event[0]
        check_event_time(parser, t)
        pop_i = convert_population_id(parser, event[1], num_populations)
        pop_j = convert_population_id(parser, event[2], num_populations)
        if pop_i == pop_j:
            parser.error("Cannot set diagonal elements in migration matrix")
        rate = event[3]
        check_migration_rate(parser, rate)
        msp_event = msprime.MigrationRateChange(t, rate, matrix_index=(pop_i, pop_j))
        demographic_events.append((index, msp_event))
    for index, event in args.migration_matrix_change:
        if len(event) < 3:
            parser.error("Need at least three arguments to -ma")
        if len(args.admixture) != 0:
            raise_admixture_incompatability_error(parser, "-ema")
        t = convert_float(event[0], parser)
        check_event_time(parser, t)
        if convert_int(event[1], parser) != num_populations:
            parser.error("num_populations must be equal for new migration matrix")
        matrix = convert_migration_matrix(parser, event[2:], num_populations)
        for j in range(num_populations):
            for k in range(num_populations):
                if j != k:
                    msp_event = msprime.MigrationRateChange(
                        t, matrix[j][k], matrix_index=(j, k)
                    )
                    demographic_events.append((index, msp_event))

    demographic_events.sort(key=lambda x: (x[0], x[1].time))
    time_sorted = sorted(demographic_events, key=lambda x: x[1].time)
    if demographic_events != time_sorted:
        parser.error("Demographic events must be supplied in non-decreasing time order")

    demography.events = [event for _, event in demographic_events]
    demography.migration_matrix = migration_matrix

    # Adjust the population sizes so that the timescales agree. In principle
    # we could correct this with a ploidy value=0.5, but what we have here
    # seems less awful.
    for msp_event in demography.events:
        if isinstance(msp_event, msprime.PopulationParametersChange):
            if msp_event.initial_size is not None:
                msp_event.initial_size /= 2
    for j, pop in enumerate(demography.populations):
        pop.initial_size /= 2
        pop.name = f"pop_{j}"

    runner = SimulationRunner(
        num_samples,
        demography,
        num_loci=num_loci,
        num_replicates=args.num_replicates,
        recombination_rate=r,
        mutation_rate=mu,
        gene_conversion_rate=gc_rate,
        gene_conversion_tract_length=gc_tract_length,
        precision=args.precision,
        print_trees=args.trees,
        ms_random_seeds=args.random_seeds,
        hotspots=args.hotspots,
    )
    return runner
    msp.PopulationConfiguration(initial_size = Denisovasize), #3
    msp.PopulationConfiguration(initial_size = Neanderthal_recentsize), #4
    msp.PopulationConfiguration(initial_size = Neanderthal_recentsize), #5
    msp.PopulationConfiguration(initial_size = Neanderthal_recentsize), #6
]


demographic_events_dict = {


    # admixture times
    1551.06896552 + random.uniform(0,1)/100000.0: msp.MassMigration(time = 1551.06896552, source = 1, destination = 5,proportion = 0.02),
    Denisova_admix_time/gen_time + random.uniform(0,1)/100000.0: msp.MassMigration(time = Denisova_admix_time/gen_time, source = admix_into, destination = 2 ,proportion = DenisovaProportion),

    # Human parameters
    OutOfafrica - 300 + random.uniform(0,1)/100000.0: msp.PopulationParametersChange(time = OutOfafrica - 300, initial_size = 1305, growth_rate = 0, population_id = 1),
    OutOfafrica - 200 + random.uniform(0,1)/100000.0: msp.PopulationParametersChange(time = OutOfafrica - 200, initial_size = 5000, growth_rate = 0, population_id = 1),
    OutOfafrica - 100 + random.uniform(0,1)/100000.0: msp.PopulationParametersChange(time = OutOfafrica - 100, initial_size = 250, growth_rate = 0, population_id = 1),
    OutOfafrica + random.uniform(0,1)/100000.0: msp.MassMigration(time = OutOfafrica, source = 1, destination = 0, proportion = 1.0),
    OutOfafrica + 0.0001 + random.uniform(0,1)/100000.0: msp.PopulationParametersChange(time = OutOfafrica  + 0.0001, initial_size = Modernhumans, growth_rate = 0, population_id = 0),

    # archaic population merges parameters
    IntrogressingVindijaSplit/gen_time + random.uniform(0,1)/100000.0: msp.MassMigration(time = IntrogressingVindijaSplit/gen_time, source = 6, destination = 5, proportion = 1.0),
    130000/gen_time - 0.0001 + random.uniform(0,1)/100000.0: msp.MassMigration(time = 130000/gen_time - 0.0001, source = 5, destination = 4, proportion = 1.0), 
    Denisova_split/gen_time - 0.0001 + random.uniform(0,1)/100000.0: msp.MassMigration(time = Denisova_split/gen_time, source = 3, destination = 2, proportion = 1.0),
    DenisovaNeanderthal/gen_time - 0.0001 + random.uniform(0,1)/100000.0: msp.MassMigration(time = DenisovaNeanderthal/gen_time, source = 4, destination = 2, proportion = 1.0),


    # psms pop sizes
    130000/gen_time + random.uniform(0,1)/100000.0: msp.PopulationParametersChange(time = 130000/gen_time + 0.0001, initial_size = Neanderthal_latersize, growth_rate = 0, population_id = 4),