Exemple #1
0
 def test_single_growth_rate(self):
     # Set out our values in units of generations and absolute sizes.
     Ne = 1000
     growth_rate = -0.01
     end_time = 20
     end_size = Ne * math.exp(-growth_rate * end_time)
     population_configurations = [
         msprime.PopulationConfiguration(
             sample_size=2, initial_size=Ne, growth_rate=growth_rate)]
     demographic_events = [
         msprime.PopulationParametersChange(time=end_time, growth_rate=0)]
     simulator = msprime.simulator_factory(
         Ne=Ne,
         population_configurations=population_configurations,
         demographic_events=demographic_events)
     ll_sim = simulator.create_ll_instance()
     ll_end_time = ll_sim.debug_demography()
     self.assertEqual(end_time, ll_end_time)
     populations = [
         msprime.Population(**d)
         for d in ll_sim.get_population_configuration()]
     self.assertEqual(len(populations), 1)
     pop = populations[0]
     self.assertEqual(pop.growth_rate, growth_rate)
     self.assertEqual(pop.initial_size, Ne)
     self.assertEqual(pop.get_size(end_time), end_size)
     # Now fast forward to the next time slice.
     ll_end_time = ll_sim.debug_demography()
     self.assertTrue(math.isinf(ll_end_time))
     populations = [
         msprime.Population(**d) for d in ll_sim.get_population_configuration()]
     pop = populations[0]
     self.assertEqual(pop.growth_rate, 0)
     self.assertEqual(pop.initial_size, end_size)
     self.assertEqual(pop.get_size(10), end_size)
Exemple #2
0
 def test_population(self):
     examples = [
         msprime.Population(),
         msprime.Population(initial_size=2),
         msprime.Population(growth_rate=5),
         msprime.Population(initial_size=234, growth_rate=10),
     ]
     self.assert_repr_round_trip(examples)
Exemple #3
0
 def test_symmetric_growth_rates(self):
     # Test a symmetric model where we start with a negative growth
     # rate and then increase back to the same value.
     Ne = 10001
     growth_rate = 0.0125
     delta_t = 50
     end_size = Ne * math.exp(-growth_rate * delta_t)
     population_configurations = [
         msprime.PopulationConfiguration(sample_size=2,
                                         initial_size=Ne,
                                         growth_rate=growth_rate)
     ]
     demographic_events = [
         msprime.PopulationParametersChange(time=delta_t,
                                            growth_rate=-growth_rate),
         msprime.PopulationParametersChange(time=2 * delta_t, growth_rate=0)
     ]
     simulator = msprime.simulator_factory(
         Ne=Ne,
         population_configurations=population_configurations,
         demographic_events=demographic_events)
     ll_sim = simulator.create_ll_instance()
     ll_end_time = ll_sim.debug_demography()
     t = delta_t
     self.assertEqual(t, ll_end_time * 4 * Ne)
     populations = [
         msprime.Population(Ne=Ne, **d)
         for d in ll_sim.get_population_configuration()
     ]
     pop = populations[0]
     self.assertEqual(pop.growth_rate, growth_rate)
     self.assertEqual(pop.initial_size, Ne)
     self.assertEqual(pop.get_size(delta_t), end_size)
     # Now fast forward to the next time slice.
     t += delta_t
     ll_end_time = ll_sim.debug_demography()
     self.assertEqual(t, ll_end_time * 4 * Ne)
     pop = [
         msprime.Population(Ne=Ne, **d)
         for d in ll_sim.get_population_configuration()
     ][0]
     self.assertEqual(pop.growth_rate, -growth_rate)
     self.assertEqual(pop.initial_size, end_size)
     self.assertEqual(pop.get_size(delta_t), Ne)
     # Now fast forward to the next time slice.
     ll_end_time = ll_sim.debug_demography()
     self.assertTrue(math.isinf(ll_end_time))
     populations = [
         msprime.Population(Ne=Ne, **d)
         for d in ll_sim.get_population_configuration()
     ]
     pop = populations[0]
     self.assertEqual(pop.growth_rate, 0)
     self.assertEqual(pop.initial_size, Ne)
Exemple #4
0
def create_simulation_runner(parser, arg_list):
    """
    Parses the arguments and returns a SimulationRunner instance.
    """
    args = parser.parse_args(arg_list)
    if args.mutation_rate == 0 and not args.trees:
        parser.error("Need to specify at least one of --theta or --trees")
    num_loci = int(args.recombination[1])
    if args.recombination[1] != num_loci:
        parser.error("Number of loci must be integer value")
    if args.recombination[0] != 0.0 and num_loci < 2:
        parser.error("Number of loci must > 1")
    r = 0.0
    # We don't scale recombination or mutation rates by the size
    # of the region.
    if num_loci > 1:
        r = args.recombination[0] / (num_loci - 1)
    mu = args.mutation_rate / num_loci

    # ms uses a ratio to define the GC rate, but if the recombination rate
    # is zero we define the gc rate directly.
    gc_param, gc_tract_length = args.gene_conversion
    gc_rate = 0
    if r == 0.0:
        if num_loci > 1:
            gc_rate = gc_param / (num_loci - 1)
    else:
        gc_rate = r * gc_param

    demography = msprime.Demography.isolated_model([1])
    # Check the structure format.
    symmetric_migration_rate = 0.0
    num_populations = 1
    migration_matrix = [[0.0]]
    num_samples = [args.sample_size]
    if args.structure is not None:
        num_populations = convert_int(args.structure[0], parser)
        # We must have at least num_population sample_configurations
        if len(args.structure) < num_populations + 1:
            parser.error("Must have num_populations sample sizes")
        demography = msprime.Demography.isolated_model([1] * num_populations)
        num_samples = [0] * num_populations
        for j in range(num_populations):
            num_samples[j] = convert_int(args.structure[j + 1], parser)
        if sum(num_samples) != args.sample_size:
            parser.error("Population sample sizes must sum to sample_size")
        # We optionally have the overall migration_rate here
        if len(args.structure) == num_populations + 2:
            symmetric_migration_rate = convert_float(
                args.structure[num_populations + 1], parser
            )
            check_migration_rate(parser, symmetric_migration_rate)
        elif len(args.structure) > num_populations + 2:
            parser.error("Too many arguments to --structure/-I")
        if num_populations > 1:
            migration_matrix = [
                [
                    symmetric_migration_rate / (num_populations - 1) * int(j != k)
                    for j in range(num_populations)
                ]
                for k in range(num_populations)
            ]
    else:
        if len(args.migration_matrix_entry) > 0:
            parser.error(
                "Cannot specify migration matrix entries without "
                "first providing a -I option"
            )
        if args.migration_matrix is not None:
            parser.error(
                "Cannot specify a migration matrix without "
                "first providing a -I option"
            )
    if args.migration_matrix is not None:
        migration_matrix = convert_migration_matrix(
            parser, args.migration_matrix, num_populations
        )
    for matrix_entry in args.migration_matrix_entry:
        pop_i = convert_population_id(parser, matrix_entry[0], num_populations)
        pop_j = convert_population_id(parser, matrix_entry[1], num_populations)
        rate = matrix_entry[2]
        if pop_i == pop_j:
            parser.error("Cannot set diagonal elements in migration matrix")
        check_migration_rate(parser, rate)
        migration_matrix[pop_i][pop_j] = rate

    # Set the initial demography
    if args.growth_rate is not None:
        for population in demography.populations:
            population.growth_rate = args.growth_rate
    for population_id, growth_rate in args.population_growth_rate:
        pid = convert_population_id(parser, population_id, num_populations)
        demography.populations[pid].growth_rate = growth_rate
    for population_id, size in args.population_size:
        pid = convert_population_id(parser, population_id, num_populations)
        demography.populations[pid].initial_size = size

    demographic_events = []
    # First we look at population split events. We do this differently
    # to ms, as msprime requires a fixed number of population. Therefore,
    # modify the number of populations to take into account populations
    # splits. This is a messy hack, and will probably need to be changed.
    for index, (t, population_id, proportion) in args.admixture:
        check_event_time(parser, t)
        pid = convert_population_id(parser, population_id, num_populations)
        if proportion < 0 or proportion > 1:
            parser.error("Proportion value must be 0 <= p <= 1.")
        # In ms, the probability of staying in source is p and the probabilty
        # of moving to the new population is 1 - p.
        event = (index, msprime.MassMigration(t, pid, num_populations, 1 - proportion))
        demographic_events.append(event)

        num_populations += 1
        # We add another element to each row in the migration matrix
        # along with an other row. All new entries are zero.
        for row in migration_matrix:
            row.append(0)
        migration_matrix.append([0 for j in range(num_populations)])
        demography.populations.append(msprime.Population(initial_size=1))
        num_samples.append(0)

    # Add the demographic events
    for index, (t, alpha) in args.growth_rate_change:
        if len(args.admixture) != 0:
            raise_admixture_incompatability_error(parser, "-eG")
        check_event_time(parser, t)
        demographic_events.append(
            (index, msprime.PopulationParametersChange(time=t, growth_rate=alpha))
        )
    for index, (t, population_id, alpha) in args.population_growth_rate_change:
        pid = convert_population_id(parser, population_id, num_populations)
        check_event_time(parser, t)
        demographic_events.append(
            (
                index,
                msprime.PopulationParametersChange(
                    time=t, growth_rate=alpha, population_id=pid
                ),
            )
        )
    for index, (t, x) in args.size_change:
        if len(args.admixture) != 0:
            raise_admixture_incompatability_error(parser, "-eN")
        check_event_time(parser, t)
        demographic_events.append(
            (
                index,
                msprime.PopulationParametersChange(
                    time=t, initial_size=x, growth_rate=0
                ),
            )
        )
    for index, (t, population_id, x) in args.population_size_change:
        check_event_time(parser, t)
        pid = convert_population_id(parser, population_id, num_populations)
        demographic_events.append(
            (
                index,
                msprime.PopulationParametersChange(
                    time=t, initial_size=x, growth_rate=0, population_id=pid
                ),
            )
        )
    for index, (t, pop_i, pop_j) in args.population_split:
        check_event_time(parser, t)
        pop_i = convert_population_id(parser, pop_i, num_populations)
        pop_j = convert_population_id(parser, pop_j, num_populations)
        demographic_events.append((index, msprime.MassMigration(t, pop_i, pop_j, 1.0)))
        # Migration rates from subpopulation i (M[k, i], k != i) are set to zero.
        for k in range(num_populations):
            if k != pop_i:
                event = msprime.MigrationRateChange(t, 0.0, matrix_index=(k, pop_i))
                demographic_events.append((index, event))

    # Demographic events that affect the migration matrix
    if num_populations == 1:
        condition = (
            len(args.migration_rate_change) > 0
            or len(args.migration_matrix_entry_change) > 0
            or len(args.migration_matrix_change) > 0
        )
        if condition:
            parser.error("Cannot change migration rates for 1 population")
    for index, (t, x) in args.migration_rate_change:
        if len(args.admixture) != 0:
            raise_admixture_incompatability_error(parser, "-eM")
        check_migration_rate(parser, x)
        check_event_time(parser, t)
        event = msprime.MigrationRateChange(t, x / (num_populations - 1))
        demographic_events.append((index, event))
    for index, event in args.migration_matrix_entry_change:
        t = event[0]
        check_event_time(parser, t)
        pop_i = convert_population_id(parser, event[1], num_populations)
        pop_j = convert_population_id(parser, event[2], num_populations)
        if pop_i == pop_j:
            parser.error("Cannot set diagonal elements in migration matrix")
        rate = event[3]
        check_migration_rate(parser, rate)
        msp_event = msprime.MigrationRateChange(t, rate, matrix_index=(pop_i, pop_j))
        demographic_events.append((index, msp_event))
    for index, event in args.migration_matrix_change:
        if len(event) < 3:
            parser.error("Need at least three arguments to -ma")
        if len(args.admixture) != 0:
            raise_admixture_incompatability_error(parser, "-ema")
        t = convert_float(event[0], parser)
        check_event_time(parser, t)
        if convert_int(event[1], parser) != num_populations:
            parser.error("num_populations must be equal for new migration matrix")
        matrix = convert_migration_matrix(parser, event[2:], num_populations)
        for j in range(num_populations):
            for k in range(num_populations):
                if j != k:
                    msp_event = msprime.MigrationRateChange(
                        t, matrix[j][k], matrix_index=(j, k)
                    )
                    demographic_events.append((index, msp_event))

    demographic_events.sort(key=lambda x: (x[0], x[1].time))
    time_sorted = sorted(demographic_events, key=lambda x: x[1].time)
    if demographic_events != time_sorted:
        parser.error("Demographic events must be supplied in non-decreasing time order")

    demography.events = [event for _, event in demographic_events]
    demography.migration_matrix = migration_matrix

    # Adjust the population sizes so that the timescales agree. In principle
    # we could correct this with a ploidy value=0.5, but what we have here
    # seems less awful.
    for msp_event in demography.events:
        if isinstance(msp_event, msprime.PopulationParametersChange):
            if msp_event.initial_size is not None:
                msp_event.initial_size /= 2
    for j, pop in enumerate(demography.populations):
        pop.initial_size /= 2
        pop.name = f"pop_{j}"

    runner = SimulationRunner(
        num_samples,
        demography,
        num_loci=num_loci,
        num_replicates=args.num_replicates,
        recombination_rate=r,
        mutation_rate=mu,
        gene_conversion_rate=gc_rate,
        gene_conversion_tract_length=gc_tract_length,
        precision=args.precision,
        print_trees=args.trees,
        ms_random_seeds=args.random_seeds,
        hotspots=args.hotspots,
    )
    return runner