def test_population_configurations(self): def f(configs): return msprime.simulator_factory(population_configurations=configs) for bad_type in [10, ["sdf"], "sdfsd"]: self.assertRaises(TypeError, f, bad_type) # Just test the basic equalities here. The actual # configuration options are tested elewhere. for N in range(1, 10): pop_configs = [ msprime.PopulationConfiguration(5, initial_size=5) for _ in range(N) ] sample_size = 5 * N sim = msprime.simulator_factory( population_configurations=pop_configs) self.assertEqual(len(sim.demography.populations), len(pop_configs)) for pop, pop_config in zip(sim.demography.populations, pop_configs): self.assertEqual(pop.initial_size, pop_config.initial_size) self.assertEqual(pop.growth_rate, pop_config.growth_rate) self.assertEqual(len(sim.samples), sample_size) self.assertEqual(len(sim.population_configuration), N) # The default is a single population sim = msprime.simulator_factory(10) self.assertEqual(len(sim.population_configuration), 1)
def test_new_old_style_model_changes_equal(self): models = [ msprime.SweepGenicSelection( position=j, start_frequency=j, end_frequency=j, alpha=j, dt=j, ) for j in range(1, 10) ] # Old style sim = msprime.simulator_factory( sample_size=2, Ne=10, demographic_events=[ msprime.SimulationModelChange(None, model) for model in models ], ) self.assertEqual(len(sim.model_change_events), len(models)) for event, model in zip(sim.model_change_events, models): self.assertEqual(event.model, model) sim2 = msprime.simulator_factory( sample_size=2, Ne=10, model=[None] + [msprime.SimulationModelChange(None, model) for model in models], ) self.assertEqual(sim.model_change_events, sim2.model_change_events)
def test_model_change_old_style(self): main_model = msprime.SmcApproxCoalescent() sim = msprime.simulator_factory( Ne=100, sample_size=2, model=main_model, demographic_events=[ msprime.SimulationModelChange( 1, msprime.DiscreteTimeWrightFisher()), msprime.SimulationModelChange(2, None), ], ) self.assertEqual(len(sim.model_change_events), 2) self.assertEqual(sim.model_change_events[0].time, 1) # When model=None we change to the standard coalescent self.assertEqual(sim.model_change_events[1].time, 2) self.assertEqual(sim.model_change_events[1].model.name, "hudson") # This should be the same in new notation sim = msprime.simulator_factory( Ne=100, sample_size=2, model=[main_model, (1, "dtwf"), (2, None)], ) self.assertEqual(len(sim.model_change_events), 2) self.assertEqual(sim.model_change_events[0].time, 1) # When model=None we change to the standard coalescent self.assertEqual(sim.model_change_events[1].time, 2) self.assertEqual(sim.model_change_events[1].model.name, "hudson")
def test_named_model_variants(self): simulation_models = [("hudson", msprime.StandardCoalescent), ("smc", msprime.SmcApproxCoalescent), ("smc_prime", msprime.SmcPrimeApproxCoalescent)] for name, model in simulation_models: sim = msprime.simulator_factory(sample_size=10, model=name.upper()) self.assertIsInstance(sim.get_model(), model) sim = msprime.simulator_factory(sample_size=10, model=name.title()) self.assertIsInstance(sim.get_model(), model)
def test_random_seed(self): seed = 12345 sim = msprime.simulator_factory(10, random_seed=seed) self.assertEqual(sim.random_generator.get_seed(), seed) # It's an error to specify both seed and generator. with self.assertRaises(ValueError): msprime.simulator_factory( 10, random_seed=1234, random_generator=_msprime.RandomGenerator(1234))
def test_named_model_variants(self): simulation_models = [ ("hudson", msprime.StandardCoalescent), ("smc", msprime.SmcApproxCoalescent), ("smc_prime", msprime.SmcPrimeApproxCoalescent), ("dtwf", msprime.DiscreteTimeWrightFisher) ] for name, model in simulation_models: sim = msprime.simulator_factory(sample_size=10, model=name.upper()) self.assertIsInstance(sim.model, model) sim = msprime.simulator_factory(sample_size=10, model=name.title()) self.assertIsInstance(sim.model, model)
def test_hudson(self): threshold = 20 sim = msprime.simulator_factory(sample_size=10, recombination_rate=5) sim.run() self.assertGreater(sim.get_num_common_ancestor_events(), threshold) self.assertGreater(sim.get_num_recombination_events(), threshold) self.assertEqual(sim.get_num_rejected_common_ancestor_events(), 0) sim = msprime.simulator_factory( sample_size=10, recombination_rate=5, model="hudson") sim.run() self.assertGreater(sim.get_num_common_ancestor_events(), threshold) self.assertGreater(sim.get_num_recombination_events(), threshold) self.assertEqual(sim.get_num_rejected_common_ancestor_events(), 0)
def verify_simulation(self, n, m, r): """ Verifies a simulation for the specified parameters. """ recomb_map = msprime.RecombinationMap.uniform_map(m, r, num_loci=m) rng = msprime.RandomGenerator(1) sim = msprime.simulator_factory( n, recombination_map=recomb_map, random_generator=rng) self.assertEqual(sim.random_generator, rng) sim.run() self.assertEqual(sim.num_breakpoints, len(sim.breakpoints)) self.assertGreater(sim.time, 0) self.assertGreater(sim.num_avl_node_blocks, 0) self.assertGreater(sim.num_segment_blocks, 0) self.assertGreater(sim.num_node_mapping_blocks, 0) tree_sequence = sim.get_tree_sequence() t = 0.0 for record in tree_sequence.nodes(): if record.time > t: t = record.time self.assertEqual(sim.time, t) self.assertGreater(sim.num_common_ancestor_events, 0) self.assertGreaterEqual(sim.num_recombination_events, 0) self.assertGreaterEqual(sim.total_num_migration_events, 0) self.assertGreaterEqual(sim.num_multiple_recombination_events, 0) self.verify_sparse_trees(tree_sequence) self.verify_dump_load(tree_sequence)
def test_multimerger(self): rng = msprime.RandomGenerator(1234) sim = msprime.simulator_factory( 100, recombination_rate=0.1, record_full_arg=True, random_generator=rng, demographic_events=[ msprime.InstantaneousBottleneck(time=0.1, population=0, strength=5)]) self.verify(sim, multiple_mergers=True)
def test_single_growth_rate(self): # Set out our values in units of generations and absolute sizes. Ne = 1000 growth_rate = -0.01 end_time = 20 end_size = Ne * math.exp(-growth_rate * end_time) population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=Ne, growth_rate=growth_rate)] demographic_events = [ msprime.PopulationParametersChange(time=end_time, growth_rate=0)] simulator = msprime.simulator_factory( Ne=Ne, population_configurations=population_configurations, demographic_events=demographic_events) ll_sim = simulator.create_ll_instance() ll_end_time = ll_sim.debug_demography() self.assertEqual(end_time, ll_end_time) populations = [ msprime.Population(**d) for d in ll_sim.get_population_configuration()] self.assertEqual(len(populations), 1) pop = populations[0] self.assertEqual(pop.growth_rate, growth_rate) self.assertEqual(pop.initial_size, Ne) self.assertEqual(pop.get_size(end_time), end_size) # Now fast forward to the next time slice. ll_end_time = ll_sim.debug_demography() self.assertTrue(math.isinf(ll_end_time)) populations = [ msprime.Population(**d) for d in ll_sim.get_population_configuration()] pop = populations[0] self.assertEqual(pop.growth_rate, 0) self.assertEqual(pop.initial_size, end_size) self.assertEqual(pop.get_size(10), end_size)
def run_replicate(self, j, treefile): recomb_map = msprime.RecombinationMap.uniform_map( self.num_loci, self.recombination_rate, self.num_loci) sim = msprime.simulator_factory(self.sample_size, recombination_map=recomb_map, Ne=self.effective_population_size) sim.run() mutation_rate = 0 if self.generate_haplotypes: mutation_rate = self.mutation_rate mutation_generator = msprime.MutationGenerator( msprime.RandomGenerator(random.randint(1, 2**31)), mutation_rate) tree_sequence = sim.get_tree_sequence(mutation_generator) if self.generate_trees: tree_sequence.dump(treefile) if self.generate_haplotypes: with open(treefile, "w") as f: for h in tree_sequence.haplotypes(): print(h, file=f) self.tree_file_size[j] = os.path.getsize(treefile) self.used_memory[j] = sim.used_memory self.num_trees[j] = sim.num_breakpoints self.num_multiple_re_events = sim.num_multiple_recombination_events self.num_re_events[j] = sim.num_recombination_events self.num_ca_events[j] = sim.num_common_ancestor_events self.num_records[j] = tree_sequence.num_edges self.num_nodes[j] = tree_sequence.num_nodes self.num_records_per_tree = get_mean_records_per_tree(tree_sequence)
def test_recombination_n100(self): rng = _msprime.RandomGenerator(100) sim = msprime.simulator_factory(100, recombination_rate=0.2, record_full_arg=True, random_generator=rng) self.verify(sim)
def verify_simulation(self, n, m, r): """ Verifies a simulation for the specified parameters. """ recomb_map = msprime.RecombinationMap.uniform_map(m, r) rng = _msprime.RandomGenerator(1) sim = msprime.simulator_factory( n, recombination_map=recomb_map, random_generator=rng, discrete_genome=True, ) self.assertEqual(sim.random_generator, rng) sim.run() self.assertEqual(sim.num_breakpoints, len(sim.breakpoints)) self.assertGreater(sim.time, 0) self.assertGreater(sim.num_avl_node_blocks, 0) self.assertGreater(sim.num_segment_blocks, 0) self.assertGreater(sim.num_node_mapping_blocks, 0) tree_sequence = next(sim.run_replicates(1)) t = 0.0 for record in tree_sequence.nodes(): if record.time > t: t = record.time self.assertEqual(sim.time, t) self.assertGreater(sim.num_common_ancestor_events, 0) self.assertGreaterEqual(sim.num_recombination_events, 0) self.assertGreaterEqual(np.sum(sim.num_migration_events), 0) self.assertGreaterEqual(sim.num_multiple_recombination_events, 0)
def verify_simulation(self, n, m, r): """ Verifies a simulation for the specified parameters. """ recomb_map = msprime.RecombinationMap.uniform_map(m, r, num_loci=m) rng = msprime.RandomGenerator(1) sim = msprime.simulator_factory(n, recombination_map=recomb_map, random_generator=rng) self.assertEqual(sim.random_generator, rng) sim.run() self.assertEqual(sim.num_breakpoints, len(sim.breakpoints)) self.assertGreater(sim.time, 0) self.assertGreater(sim.num_avl_node_blocks, 0) self.assertGreater(sim.num_segment_blocks, 0) self.assertGreater(sim.num_node_mapping_blocks, 0) tree_sequence = sim.get_tree_sequence() t = 0.0 for record in tree_sequence.nodes(): if record.time > t: t = record.time self.assertEqual(sim.time, t) self.assertGreater(sim.num_common_ancestor_events, 0) self.assertGreaterEqual(sim.num_recombination_events, 0) self.assertGreaterEqual(sim.total_num_migration_events, 0) self.assertGreaterEqual(sim.num_multiple_recombination_events, 0) self.verify_sparse_trees(tree_sequence) self.verify_dump_load(tree_sequence)
def __init__(self, sample_size=1, num_loci=1, scaled_recombination_rate=0, num_replicates=1, migration_matrix=None, population_configurations=None, demographic_events=None, scaled_mutation_rate=0, print_trees=False, precision=3, random_seeds=None, scaled_gene_conversion_rate=0, gene_conversion_track_length=1, hotspots=None): self._sample_size = sample_size self._num_loci = num_loci self._num_replicates = num_replicates self._recombination_rate = scaled_recombination_rate self._mutation_rate = scaled_mutation_rate # For strict ms-compability we want to have m non-recombining loci if hotspots is None: self._recomb_map = msprime.RecombinationMap.uniform_map( num_loci, self._recombination_rate, discrete=True) else: self._recomb_map = hotspots_to_recomb_map(hotspots, self._recombination_rate, num_loci) # If we have specified any population_configurations we don't want # to give the overall sample size. sample_size = self._sample_size if population_configurations is not None: sample_size = None # msprime measure's time in units of generations, given a specific # Ne value whereas ms uses coalescent time. To be compatible with ms, # we therefore need to use an Ne value of 1/4. self._simulator = msprime.simulator_factory( Ne=0.25, sample_size=sample_size, recombination_map=self._recomb_map, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, gene_conversion_rate=scaled_gene_conversion_rate, gene_conversion_track_length=gene_conversion_track_length) self._precision = precision self._print_trees = print_trees # sort out the random seeds ms_seeds = random_seeds if random_seeds is None: ms_seeds = generate_seeds() seed = get_single_seed(ms_seeds) self._random_generator = msprime.RandomGenerator(seed) self._ms_random_seeds = ms_seeds self._simulator.random_generator = self._random_generator self._mutation_generator = msprime.MutationGenerator( self._random_generator, self._mutation_rate)
def test_migration_matrix(self): # Cannot specify a migration matrix without population # configurations self.assertRaises(ValueError, msprime.simulator_factory, 10, migration_matrix=[]) for N in range(1, 10): pop_configs = [ msprime.PopulationConfiguration(5) for _ in range(N) ] sim = msprime.simulator_factory( population_configurations=pop_configs) ll_sim = sim.create_ll_instance() # If we don't specify a matrix, it's 0 everywhere. matrix = [0 for j in range(N * N)] np.testing.assert_array_equal(ll_sim.get_migration_matrix(), matrix) def f(hl_matrix): return msprime.simulator_factory( population_configurations=pop_configs, migration_matrix=hl_matrix) hl_matrix = [[(j + k) * int(j != k) for j in range(N)] for k in range(N)] sim = f(hl_matrix) self.assertEqual(sim.migration_matrix, hl_matrix) # Try with equivalent numpy array. sim = f(np.array(hl_matrix)) self.assertEqual(sim.migration_matrix, hl_matrix) ll_sim = sim.create_ll_instance() ll_matrix = [v for row in hl_matrix for v in row] np.testing.assert_array_equal(ll_sim.get_migration_matrix(), ll_matrix) for bad_type in [234, 1.2]: self.assertRaises(TypeError, f, bad_type) # Iterables should raise a value error. for bad_type in [{}, ""]: self.assertRaises(ValueError, f, bad_type) # Now check for the structure of the matrix. hl_matrix[0][0] = "bad value" sim = f(hl_matrix) self.assertRaises(TypeError, sim.create_ll_instance) hl_matrix[0] = None self.assertRaises(TypeError, f, hl_matrix) hl_matrix[0] = [] self.assertRaises(ValueError, f, hl_matrix) # Simple numpy array. hl_matrix = np.ones((N, N)) np.fill_diagonal(hl_matrix, 0) sim = f(hl_matrix) np.testing.assert_array_equal(np.array(sim.migration_matrix), hl_matrix) sim.run() events = np.array(sim.num_migration_events) self.assertEqual(events.shape, (N, N)) self.assertTrue(np.all(events >= 0))
def test_hudson(self): threshold = 20 sim = msprime.simulator_factory(sample_size=10, recombination_rate=10) sim.random_generator = msprime.RandomGenerator(2) sim.run() self.assertGreater(sim.num_common_ancestor_events, threshold) self.assertGreater(sim.num_recombination_events, threshold) self.assertEqual(sim.num_rejected_common_ancestor_events, 0) sim2 = msprime.simulator_factory( sample_size=10, recombination_rate=10, model="hudson") sim2.random_generator = msprime.RandomGenerator(2) sim2.run() self.assertEqual( sim2.num_common_ancestor_events, sim.num_common_ancestor_events) self.assertEqual( sim2.num_recombination_events, sim.num_recombination_events) self.assertEqual(sim2.num_rejected_common_ancestor_events, 0)
def test_smc_variants(self): for model in ["smc", "smc_prime"]: threshold = 20 sim = msprime.simulator_factory( sample_size=10, recombination_rate=5, model=model) sim.run() self.assertGreater(sim.get_num_common_ancestor_events(), threshold) self.assertGreater(sim.get_num_recombination_events(), threshold) self.assertGreater(sim.get_num_rejected_common_ancestor_events(), 0)
def test_no_recombination(self): rng = msprime.RandomGenerator(1) sim = msprime.simulator_factory(10, random_generator=rng, record_full_arg=True) ts = self.verify(sim) ts_simplified = ts.simplify() t1 = ts.tables t2 = ts_simplified.tables self.assertEqual(t1.nodes, t2.nodes) self.assertEqual(t1.edges, t2.edges)
def test_specify_model_and_Ne(self): # When them model reference size and Ne are both specified, # Ne is ignored. for Ne in [0, 1234, None, "sdf"]: sim = msprime.simulator_factory( sample_size=2, Ne=Ne, model=msprime.SmcPrimeApproxCoalescent(20)) self.assertEqual(sim.model.reference_size, 20)
def test_event_chunk(self): sim = msprime.simulator_factory(10) for bad_chunk in [-(2**32), -1, 0]: with self.assertRaises(ValueError): sim.run(event_chunk=bad_chunk) sim.reset() sim.run(event_chunk=2**32 + 1) sim.reset() sim.run(event_chunk=2**64 + 1)
def test_debug_logging_dtwf(self): sim = msprime.simulator_factory(3, Ne=10, model="dtwf") with self.assertLogs("msprime.ancestry", logging.DEBUG) as log: sim.run(event_chunk=1) self.assertGreaterEqual(len(log.output), 3) self.assertTrue(log.output[0].startswith("INFO")) self.assertTrue(log.output[-1].startswith("INFO")) self.assertTrue( log.output[1].startswith("DEBUG:msprime.ancestry:time="))
def test_effective_population_size(self): def f(Ne): return msprime.simulator_factory(10, Ne=Ne) for bad_value in [-1, -1e16, 0]: self.assertRaises(ValueError, f, bad_value) for Ne in [1, 10, 1e5]: sim = f(Ne) self.assertEqual(sim.model.population_size, Ne) # Test the default. sim = msprime.simulator_factory(10)
def test_population_configurations(self): def f(configs): return msprime.simulator_factory(population_configurations=configs) for bad_type in [10, ["sdf"], "sdfsd"]: self.assertRaises(TypeError, f, bad_type) # Just test the basic equalities here. The actual # configuration options are tested elewhere. for N in range(1, 10): pop_configs = [msprime.PopulationConfiguration(5) for _ in range(N)] sample_size = 5 * N sim = msprime.simulator_factory(population_configurations=pop_configs) self.assertEqual(sim.population_configurations, pop_configs) self.assertEqual(len(sim.samples), sample_size) ll_sim = sim.create_ll_instance() self.assertEqual(len(ll_sim.get_population_configuration()), N) # The default is a single population sim = msprime.simulator_factory(10) ll_sim = sim.create_ll_instance() self.assertEqual(len(ll_sim.get_population_configuration()), 1)
def test_debug_func(self): sim = msprime.simulator_factory(10) count = 0 def f(sim): nonlocal count count += 1 sim.run(event_chunk=1, debug_func=f) self.assertGreater(count, 0)
def test_symmetric_growth_rates(self): # Test a symmetric model where we start with a negative growth # rate and then increase back to the same value. Ne = 10001 growth_rate = 0.0125 delta_t = 50 end_size = Ne * math.exp(-growth_rate * delta_t) population_configurations = [ msprime.PopulationConfiguration(sample_size=2, initial_size=Ne, growth_rate=growth_rate) ] demographic_events = [ msprime.PopulationParametersChange(time=delta_t, growth_rate=-growth_rate), msprime.PopulationParametersChange(time=2 * delta_t, growth_rate=0) ] simulator = msprime.simulator_factory( Ne=Ne, population_configurations=population_configurations, demographic_events=demographic_events) ll_sim = simulator.create_ll_instance() ll_end_time = ll_sim.debug_demography() t = delta_t self.assertEqual(t, ll_end_time * 4 * Ne) populations = [ msprime.Population(Ne=Ne, **d) for d in ll_sim.get_population_configuration() ] pop = populations[0] self.assertEqual(pop.growth_rate, growth_rate) self.assertEqual(pop.initial_size, Ne) self.assertEqual(pop.get_size(delta_t), end_size) # Now fast forward to the next time slice. t += delta_t ll_end_time = ll_sim.debug_demography() self.assertEqual(t, ll_end_time * 4 * Ne) pop = [ msprime.Population(Ne=Ne, **d) for d in ll_sim.get_population_configuration() ][0] self.assertEqual(pop.growth_rate, -growth_rate) self.assertEqual(pop.initial_size, end_size) self.assertEqual(pop.get_size(delta_t), Ne) # Now fast forward to the next time slice. ll_end_time = ll_sim.debug_demography() self.assertTrue(math.isinf(ll_end_time)) populations = [ msprime.Population(Ne=Ne, **d) for d in ll_sim.get_population_configuration() ] pop = populations[0] self.assertEqual(pop.growth_rate, 0) self.assertEqual(pop.initial_size, Ne)
def test_sample_size(self): self.assertRaises(ValueError, msprime.simulator_factory) self.assertRaises(ValueError, msprime.simulator_factory, 1) self.assertRaises(ValueError, msprime.simulator_factory, sample_size=1) for n in [2, 100, 1000]: sim = msprime.simulator_factory(n) self.assertEqual(sim.num_samples, n) self.assertEqual(len(sim.samples), n) for sample in sim.samples: self.assertEqual(sample[0], 0) self.assertEqual(sample[1], 0)
def test_migration_matrix(self): m = [ [0, 1, 2], [3, 0, 4], [5, 6, 0]] sim = msprime.simulator_factory( population_configurations=[ msprime.PopulationConfiguration(1), msprime.PopulationConfiguration(1), msprime.PopulationConfiguration(1)], migration_matrix=m) self.assertEqual(sim.migration_matrix, m)
def test_info_logging(self): sim = msprime.simulator_factory(10) with self.assertLogs("msprime.ancestry", logging.INFO) as log: sim.run() self.assertEqual(len(log.output), 2) self.assertEqual( log.output[0], ("INFO:msprime.ancestry:Running model {'name': 'hudson'} " "until max time: inf"), ) self.assertTrue(log.output[1].startswith( "INFO:msprime.ancestry:Completed at time"))
def test_migration_matrix(self): # Cannot specify a migration matrix without population # configurations self.assertRaises(ValueError, msprime.simulator_factory, 10, migration_matrix=[]) for N in range(1, 10): pop_configs = [ msprime.PopulationConfiguration(5) for _ in range(N) ] sim = msprime.simulator_factory( population_configurations=pop_configs) # If we don't specify a matrix, it's 0 everywhere. matrix = np.zeros((N, N)) np.testing.assert_array_equal(sim.migration_matrix, matrix) def f(matrix): return msprime.simulator_factory( population_configurations=pop_configs, migration_matrix=matrix) matrix = [[(j + k) * int(j != k) for j in range(N)] for k in range(N)] sim = f(matrix) np.testing.assert_array_equal(sim.demography.migration_matrix, matrix) # Try with equivalent numpy array. sim = f(np.array(matrix)) np.testing.assert_array_equal(sim.demography.migration_matrix, matrix) np.testing.assert_array_equal(sim.migration_matrix, matrix) for bad_type in [{}, "", 234, 1.2]: self.assertRaises(ValueError, f, bad_type) # Now check for the structure of the matrix. matrix[0][0] = "bad value" self.assertRaises(ValueError, f, matrix) with warnings.catch_warnings(): warnings.simplefilter("ignore") matrix[0] = None self.assertRaises(ValueError, f, matrix) matrix[0] = [] self.assertRaises(ValueError, f, matrix) # Simple numpy array. matrix = np.ones((N, N)) np.fill_diagonal(matrix, 0) sim = f(matrix) np.testing.assert_array_equal( np.array(sim.demography.migration_matrix), matrix) sim.run() events = np.array(sim.num_migration_events) self.assertEqual(events.shape, (N, N)) self.assertTrue(np.all(events >= 0))
def test_migration_matrix(self): # Cannot specify a migration matrix without population # configurations self.assertRaises( ValueError, msprime.simulator_factory, 10, migration_matrix=[]) for N in range(1, 10): pop_configs = [ msprime.PopulationConfiguration(5) for _ in range(N)] sim = msprime.simulator_factory( population_configurations=pop_configs) ll_sim = sim.create_ll_instance() # If we don't specify a matrix, it's 0 everywhere. matrix = [0 for j in range(N * N)] self.assertEqual(ll_sim.get_migration_matrix(), matrix) def f(hl_matrix): return msprime.simulator_factory( population_configurations=pop_configs, migration_matrix=hl_matrix) hl_matrix = [ [(j + k) * int(j != k) for j in range(N)] for k in range(N)] sim = f(hl_matrix) self.assertEqual(sim.migration_matrix, hl_matrix) # Try with equivalent numpy array. sim = f(np.array(hl_matrix)) self.assertEqual(sim.migration_matrix, hl_matrix) ll_sim = sim.create_ll_instance() ll_matrix = [v for row in hl_matrix for v in row] self.assertEqual(ll_sim.get_migration_matrix(), ll_matrix) for bad_type in [234, 1.2]: self.assertRaises(TypeError, f, bad_type) # Iterables should raise a value error. for bad_type in [{}, ""]: self.assertRaises(ValueError, f, bad_type) # Now check for the structure of the matrix. hl_matrix[0][0] = "bad value" sim = f(hl_matrix) self.assertRaises(TypeError, sim.create_ll_instance) hl_matrix[0] = None self.assertRaises(TypeError, f, hl_matrix) hl_matrix[0] = [] self.assertRaises(ValueError, f, hl_matrix) # Simple numpy array. hl_matrix = np.ones((N, N)) np.fill_diagonal(hl_matrix, 0) sim = f(hl_matrix) self.assertTrue(np.array_equal(np.array(sim.migration_matrix), hl_matrix)) sim.run() events = np.array(sim.num_migration_events) self.assertEqual(events.shape, (N, N)) self.assertTrue(np.all(events >= 0))
def test_perf_parameters(self): sim = msprime.simulator_factory(10) sim.run() self.assertGreater(sim.avl_node_block_size, 0) self.assertGreater(sim.segment_block_size, 0) self.assertGreater(sim.node_mapping_block_size, 0) sim.reset() sim.avl_node_block_size = 1 sim.segment_block_size = 1 sim.node_mapping_block_size = 1 self.assertEqual(sim.avl_node_block_size, 1) self.assertEqual(sim.segment_block_size, 1) self.assertEqual(sim.node_mapping_block_size, 1)
def test_recombination_map(self): def f(recomb_map): return msprime.simulator_factory(10, recombination_map=recomb_map) self.assertRaises(TypeError, f, "wrong type") for n in range(2, 10): positions = list(range(n)) rates = [0.1 * j for j in range(n - 1)] # Use the old-form RecombinationMap recomb_map = msprime.RecombinationMap(positions, rates + [0.0]) sim = msprime.simulator_factory(10, recombination_map=recomb_map) other_map = msprime.RateMap(**sim.recombination_map) self.assertEqual(list(other_map.position), positions) self.assertEqual(list(other_map.rate), rates) self.assertEqual(sim.sequence_length, other_map.sequence_length) # Use the new-form RateMap rate_map = msprime.RateMap(positions, rates) sim = msprime.simulator_factory(10, recombination_map=rate_map) other_map = msprime.RateMap(**sim.recombination_map) self.assertEqual(list(other_map.position), positions) self.assertEqual(list(other_map.rate), rates) self.assertEqual(sim.sequence_length, other_map.sequence_length)
def test_smc_variants(self): for model in ["smc", "smc_prime"]: threshold = 20 sim = msprime.simulator_factory( sample_size=10, recombination_rate=5, model=model, random_generator=_msprime.RandomGenerator(3), ) sim.run() self.assertGreater(sim.num_rejected_common_ancestor_events, 0) self.assertGreater(sim.num_common_ancestor_events, threshold) self.assertGreater(sim.num_recombination_events, threshold)
def test_symmetric_growth_rates(self): # Test a symmetric model where we start with a negative growth # rate and then increase back to the same value. Ne = 10001 growth_rate = 0.0125 delta_t = 50 end_size = Ne * math.exp(-growth_rate * delta_t) population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=Ne, growth_rate=growth_rate)] demographic_events = [ msprime.PopulationParametersChange( time=delta_t, growth_rate=-growth_rate), msprime.PopulationParametersChange( time=2 * delta_t, growth_rate=0)] simulator = msprime.simulator_factory( Ne=Ne, population_configurations=population_configurations, demographic_events=demographic_events) ll_sim = simulator.create_ll_instance() ll_end_time = ll_sim.debug_demography() t = delta_t self.assertEqual(t, ll_end_time * 4 * Ne) populations = [ msprime.Population(Ne=Ne, **d) for d in ll_sim.get_population_configuration()] pop = populations[0] self.assertEqual(pop.growth_rate, growth_rate) self.assertEqual(pop.initial_size, Ne) self.assertEqual(pop.get_size(delta_t), end_size) # Now fast forward to the next time slice. t += delta_t ll_end_time = ll_sim.debug_demography() self.assertEqual(t, ll_end_time * 4 * Ne) pop = [ msprime.Population(Ne=Ne, **d) for d in ll_sim.get_population_configuration()][0] self.assertEqual(pop.growth_rate, -growth_rate) self.assertEqual(pop.initial_size, end_size) self.assertEqual(pop.get_size(delta_t), Ne) # Now fast forward to the next time slice. ll_end_time = ll_sim.debug_demography() self.assertTrue(math.isinf(ll_end_time)) populations = [ msprime.Population(Ne=Ne, **d) for d in ll_sim.get_population_configuration()] pop = populations[0] self.assertEqual(pop.growth_rate, 0) self.assertEqual(pop.initial_size, Ne)
def test_sample_size_population_configuration(self): for d in range(1, 5): # Zero sample size is always an error configs = [msprime.PopulationConfiguration(0) for _ in range(d)] self.assertRaises( ValueError, msprime.simulator_factory, population_configurations=configs) configs = [msprime.PopulationConfiguration(2) for _ in range(d)] sim = msprime.simulator_factory(population_configurations=configs) self.assertEqual(len(sim.samples), 2 * d) samples = [] for j in range(d): samples += [msprime.Sample(population=j, time=0) for _ in range(2)] self.assertEqual(sim.samples, samples) ll_sim = sim.create_ll_instance() self.assertEqual(ll_sim.get_samples(), samples)
def test_recombination_map(self): def f(recomb_map): return msprime.simulator_factory(10, recombination_map=recomb_map) self.assertRaises(TypeError, f, "wrong type") for n in range(2, 10): positions = list(range(n)) rates = [0.1 * j for j in range(n - 1)] + [0.0] recomb_map = msprime.RecombinationMap(positions, rates) sim = msprime.simulator_factory(10, recombination_map=recomb_map) self.assertEqual(sim.recombination_map, recomb_map) self.assertEqual(recomb_map.get_positions(), positions) self.assertEqual(recomb_map.get_rates(), rates) self.assertEqual(sim.num_loci, recomb_map.get_num_loci()) ll_sim = sim.create_ll_instance() self.assertEqual(ll_sim.get_num_loci(), recomb_map.get_num_loci())
def test_model_instances(self): for bad_type in [1234, {}]: self.assertRaises( TypeError, msprime.simulator_factory, sample_size=2, model=bad_type) models = [ msprime.StandardCoalescent(), msprime.SmcApproxCoalescent(), msprime.SmcPrimeApproxCoalescent(), msprime.DiscreteTimeWrightFisher(), msprime.BetaCoalescent(), msprime.DiracCoalescent(), ] for model in models: sim = msprime.simulator_factory(sample_size=10, model=model) self.assertEqual(sim.model, model)
def test_samples(self): pop_configs = [ msprime.PopulationConfiguration(), msprime.PopulationConfiguration(), msprime.PopulationConfiguration()] samples = [ msprime.Sample(population=0, time=0), msprime.Sample(population=1, time=1), msprime.Sample(population=2, time=2)] # Ne = 1/4 to keep in coalescence units. sim = msprime.simulator_factory( Ne=1/4, samples=samples, population_configurations=pop_configs) self.assertEqual(sim.samples, samples) ll_sim = sim.create_ll_instance() self.assertEqual(ll_sim.get_samples(), samples)
def test_sample_size(self): self.assertRaises(ValueError, msprime.simulator_factory) self.assertRaises(ValueError, msprime.simulator_factory, 1) self.assertRaises( ValueError, msprime.simulator_factory, sample_size=1) for n in [2, 100, 1000]: sim = msprime.simulator_factory(n) self.assertEqual(len(sim.samples), n) ll_sim = sim.create_ll_instance() self.assertEqual(ll_sim.get_num_samples(), n) samples = ll_sim.get_samples() self.assertEqual(len(samples), n) for sample in samples: self.assertEqual(sample[0], 0) self.assertEqual(sample[1], 0)
def test_migration_matrix(self): m = [ [0, 1, 2], [3, 0, 4], [5, 6, 0]] for Ne in [1, 10, 1e6]: sim = msprime.simulator_factory( Ne=Ne, population_configurations=[ msprime.PopulationConfiguration(1), msprime.PopulationConfiguration(1), msprime.PopulationConfiguration(1)], migration_matrix=m) scaled_m = sim.get_scaled_migration_matrix() scaled_mp = [ [v * 4 * Ne for v in row] for row in m] self.assertEqual(scaled_m, scaled_mp)
def test_recombination_rate_scaling(self): values = [ (10, 0.1, 0.1), (0.1, 1, 10), (1e-8, 10**4, 10**8), (1e-8, 10**5, 10**9), ] for rate, Ne, length in values: sim = msprime.simulator_factory( 10, Ne=Ne, recombination_rate=rate, length=length) num_loci = msprime.RecombinationMap.DEFAULT_NUM_LOCI total_rate = length * rate per_locus_rate = total_rate / (num_loci - 1) # We expect all these rates to be positive. self.assertGreater(per_locus_rate, 0) ll_sim = sim.create_ll_instance() self.assertAlmostEqual(per_locus_rate, ll_sim.get_recombination_rate()) self.assertAlmostEqual( sim.recombination_map.get_per_locus_recombination_rate(), per_locus_rate)
def __init__( self, sample_size=1, num_loci=1, scaled_recombination_rate=0, num_replicates=1, migration_matrix=None, population_configurations=None, demographic_events=None, scaled_mutation_rate=0, print_trees=False, precision=3, random_seeds=None): self._sample_size = sample_size self._num_loci = num_loci self._num_replicates = num_replicates self._recombination_rate = scaled_recombination_rate self._mutation_rate = scaled_mutation_rate # For strict ms-compability we want to have m non-recombining loci recomb_map = msprime.RecombinationMap.uniform_map( num_loci, self._recombination_rate, num_loci) # If we have specified any population_configurations we don't want # to give the overall sample size. sample_size = self._sample_size if population_configurations is not None: sample_size = None # msprime measure's time in units of generations, given a specific # Ne value whereas ms uses coalescent time. To be compatible with ms, # we therefore need to use an Ne value of 1/4. self._simulator = msprime.simulator_factory( Ne=0.25, sample_size=sample_size, recombination_map=recomb_map, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) self._precision = precision self._print_trees = print_trees # sort out the random seeds ms_seeds = random_seeds if random_seeds is None: ms_seeds = generate_seeds() seed = get_single_seed(ms_seeds) self._random_generator = msprime.RandomGenerator(seed) self._ms_random_seeds = ms_seeds self._simulator.random_generator = self._random_generator self._mutation_generator = msprime.MutationGenerator( self._random_generator, self._mutation_rate)
def __init__( self, sample_size=1, num_loci=1, scaled_recombination_rate=0, num_replicates=1, migration_matrix=None, population_configurations=None, demographic_events=None, scaled_mutation_rate=0, print_trees=False, precision=3, random_seeds=None): self._sample_size = sample_size self._num_loci = num_loci self._num_replicates = num_replicates # We use unscaled per-generation rates. By setting Ne = 1 we # don't need to rescale, but we still need to divide by 4 to # cancel the factor introduced when calculated the scaled rates. self._recombination_rate = scaled_recombination_rate / 4 self._mutation_rate = scaled_mutation_rate / 4 # For strict ms-compability we want to have m non-recombining loci recomb_map = msprime.RecombinationMap.uniform_map( num_loci, self._recombination_rate, num_loci) # If we have specified any population_configurations we don't want # to give the overall sample size. sample_size = self._sample_size if population_configurations is not None: sample_size = None self._simulator = msprime.simulator_factory( sample_size=sample_size, recombination_map=recomb_map, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) self._precision = precision self._print_trees = print_trees # sort out the random seeds ms_seeds = random_seeds if random_seeds is None: ms_seeds = generate_seeds() seed = get_single_seed(ms_seeds) self._random_generator = msprime.RandomGenerator(seed) self._ms_random_seeds = ms_seeds self._simulator.set_random_generator(self._random_generator)
def test_single_growth_rate_size_change(self): # Set out our values in units of generations and absolute sizes. Ne = 1000 growth_rate = -0.01 end_time = 20 end_size = Ne * math.exp(-growth_rate * end_time) new_size = 4 * Ne population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=Ne, growth_rate=growth_rate)] demographic_events = [ msprime.PopulationParametersChange( time=end_time, initial_size=new_size, growth_rate=0)] simulator = msprime.simulator_factory( Ne=Ne, population_configurations=population_configurations, demographic_events=demographic_events) ll_sim = simulator.create_ll_instance() ll_end_time = ll_sim.debug_demography() self.assertEqual(end_time, ll_end_time * 4 * Ne) populations = [ msprime.Population(Ne=Ne, **d) for d in ll_sim.get_population_configuration()] self.assertEqual(len(populations), 1) pop = populations[0] self.assertEqual(pop.growth_rate, growth_rate) self.assertEqual(pop.initial_size, Ne) self.assertEqual(pop.get_size(end_time), end_size) # Now fast forward to the next time slice. ll_end_time = ll_sim.debug_demography() self.assertTrue(math.isinf(ll_end_time)) populations = [ msprime.Population(Ne=Ne, **d) for d in ll_sim.get_population_configuration()] pop = populations[0] self.assertEqual(pop.growth_rate, 0) self.assertEqual(pop.initial_size, new_size) self.assertEqual(pop.get_size(10), new_size)
def f(Ne): return msprime.simulator_factory(10, Ne=Ne)
def f(configs): return msprime.simulator_factory(population_configurations=configs)
def f(hl_matrix): return msprime.simulator_factory( population_configurations=pop_configs, migration_matrix=hl_matrix)
def test_default_migration_matrix(self): sim = msprime.simulator_factory(10) ll_sim = sim.create_ll_instance() self.assertEqual(ll_sim.get_migration_matrix(), [0.0])
def f(recomb_rate): return msprime.simulator_factory(10, recombination_rate=recomb_rate)
def f(recomb_map): return msprime.simulator_factory(10, recombination_map=recomb_map)
def test_length(self): for bad_length in [-1, 0, -1e-6]: with self.assertRaises(ValueError): msprime.simulator_factory(10, length=bad_length)
def test_random_seed(self): seed = 12345 rng = msprime.RandomGenerator(seed) sim = msprime.simulator_factory(10, random_generator=rng) self.assertEqual(rng, sim.random_generator) self.assertEqual(rng.get_seed(), seed)