def test_too_many_sweeps(self): # What happens when we have loads of sweeps demographic_events = [] for j in range(1000): sweep_model = msprime.SweepGenicSelection( position=0.5, start_frequency=0.69, end_frequency=0.7, alpha=1000, dt=0.0125, ) # Start the sweep after 0.1 generations of Hudson demographic_events.append( msprime.SimulationModelChange(time=lambda t: t + 0.1, model=sweep_model)) # Revert back to Hudson until the next sweep demographic_events.append(msprime.SimulationModelChange()) ts = msprime.simulate( 10, Ne=0.25, length=10, recombination_rate=0.2, demographic_events=demographic_events, random_seed=2, ) self.assertTrue(all(tree.num_roots == 1 for tree in ts.trees()))
def test_one_event(self): expected_event = msprime.SimulationModelChange( time=1.33, model=msprime.StandardCoalescent() ) model, events = ancestry._parse_model_arg(["dtwf", (1.33, "hudson")]) assert model == msprime.DiscreteTimeWrightFisher() assert events == [expected_event] model, events = ancestry._parse_model_arg(["dtwf", (1.33, None)]) assert model == msprime.DiscreteTimeWrightFisher() assert events == [expected_event] model, events = ancestry._parse_model_arg( ["dtwf", (1.33, msprime.StandardCoalescent())] ) assert model == msprime.DiscreteTimeWrightFisher() assert events == [expected_event] model, events = ancestry._parse_model_arg(["dtwf", expected_event]) assert model == msprime.DiscreteTimeWrightFisher() assert events == [expected_event] # We should take a copy of the event. assert events[0] is not expected_event model, events = ancestry._parse_model_arg(["dtwf", (None, None)]) assert model == msprime.DiscreteTimeWrightFisher() assert events == [ msprime.SimulationModelChange(time=None, model=msprime.StandardCoalescent()) ]
def test_new_old_style_model_changes_equal(self): models = [ msprime.SweepGenicSelection( position=j, start_frequency=j, end_frequency=j, alpha=j, dt=j, ) for j in range(1, 10) ] # Old style sim = msprime.simulator_factory( sample_size=2, Ne=10, demographic_events=[ msprime.SimulationModelChange(None, model) for model in models ], ) self.assertEqual(len(sim.model_change_events), len(models)) for event, model in zip(sim.model_change_events, models): self.assertEqual(event.model, model) sim2 = msprime.simulator_factory( sample_size=2, Ne=10, model=[None] + [msprime.SimulationModelChange(None, model) for model in models], ) self.assertEqual(sim.model_change_events, sim2.model_change_events)
def test_many_sweeps(self): sweep_models = [ msprime.SweepGenicSelection( reference_size=0.25, position=j, start_frequency=0.69, end_frequency=0.7, alpha=1e-5, dt=0.1, ) for j in range(10) ] ts = msprime.simulate( 10, Ne=0.25, length=10, recombination_rate=0.2, demographic_events=[ msprime.SimulationModelChange(0.01, sweep_models[0]) ] + [ msprime.SimulationModelChange(None, model) for model in sweep_models ] + [msprime.SimulationModelChange()], random_seed=2, ) self.assertTrue(all(tree.num_roots == 1 for tree in ts.trees()))
def test_model_change_old_style(self): main_model = msprime.SmcApproxCoalescent() sim = msprime.simulator_factory( Ne=100, sample_size=2, model=main_model, demographic_events=[ msprime.SimulationModelChange( 1, msprime.DiscreteTimeWrightFisher()), msprime.SimulationModelChange(2, None), ], ) self.assertEqual(len(sim.model_change_events), 2) self.assertEqual(sim.model_change_events[0].time, 1) # When model=None we change to the standard coalescent self.assertEqual(sim.model_change_events[1].time, 2) self.assertEqual(sim.model_change_events[1].model.name, "hudson") # This should be the same in new notation sim = msprime.simulator_factory( Ne=100, sample_size=2, model=[main_model, (1, "dtwf"), (2, None)], ) self.assertEqual(len(sim.model_change_events), 2) self.assertEqual(sim.model_change_events[0].time, 1) # When model=None we change to the standard coalescent self.assertEqual(sim.model_change_events[1].time, 2) self.assertEqual(sim.model_change_events[1].model.name, "hudson")
def test_wf_hudson_back_and_forth(self): Ne = 100 t1 = 100 t2 = 200 ts = msprime.simulate( sample_size=10, model=msprime.DiscreteTimeWrightFisher(Ne), recombination_rate=0.1, demographic_events=[ msprime.SimulationModelChange(t1, msprime.StandardCoalescent(Ne)), msprime.SimulationModelChange( t2, msprime.DiscreteTimeWrightFisher(Ne)), ], random_seed=2, ) tree = ts.first() self.assertEqual(tree.num_roots, 1) times = ts.tables.nodes.time dtwf_times = times[np.logical_and(times > 0, times < t1, times > t2)] self.assertGreater(dtwf_times.shape[0], 0) self.assertTrue(np.all(dtwf_times == np.floor(dtwf_times))) coalescent_times = times[np.logical_and(times > t1, times < t2)] self.assertGreater(coalescent_times.shape[0], 0) self.assertTrue(np.all(coalescent_times != np.floor(coalescent_times)))
def test_one_event(self): expected_event = msprime.SimulationModelChange( time=1.33, model=msprime.StandardCoalescent()) model, events = msprime.parse_model_arg(["dtwf", (1.33, "hudson")]) self.assertEqual(model, msprime.DiscreteTimeWrightFisher()) self.assertEqual(events, [expected_event]) model, events = msprime.parse_model_arg(["dtwf", (1.33, None)]) self.assertEqual(model, msprime.DiscreteTimeWrightFisher()) self.assertEqual(events, [expected_event]) model, events = msprime.parse_model_arg( ["dtwf", (1.33, msprime.StandardCoalescent())]) self.assertEqual(model, msprime.DiscreteTimeWrightFisher()) self.assertEqual(events, [expected_event]) model, events = msprime.parse_model_arg(["dtwf", expected_event]) self.assertEqual(model, msprime.DiscreteTimeWrightFisher()) self.assertEqual(events, [expected_event]) # We should take a copy of the event. self.assertIsNot(events[0], expected_event) model, events = msprime.parse_model_arg(["dtwf", (None, None)]) self.assertEqual(model, msprime.DiscreteTimeWrightFisher()) self.assertEqual( events, [ msprime.SimulationModelChange( time=None, model=msprime.StandardCoalescent()) ], )
def test_two_events(self): expected_events = [ msprime.SimulationModelChange(time=1, model=msprime.StandardCoalescent()), msprime.SimulationModelChange(time=2, model=msprime.SmcApproxCoalescent()), ] model, events = msprime.parse_model_arg( ["dtwf", (1, "hudson"), (2, "smc")]) self.assertEqual(model, msprime.DiscreteTimeWrightFisher()) self.assertEqual(events, expected_events) model, events = msprime.parse_model_arg( ["dtwf", (1, None), (2, msprime.SmcApproxCoalescent())]) self.assertEqual(model, msprime.DiscreteTimeWrightFisher()) self.assertEqual(events, expected_events) model, events = msprime.parse_model_arg( ["dtwf", expected_events[0], (2, msprime.SmcApproxCoalescent())]) self.assertEqual(model, msprime.DiscreteTimeWrightFisher()) self.assertEqual(events, expected_events) model, events = msprime.parse_model_arg( ["dtwf", expected_events[0], (2, msprime.SmcApproxCoalescent())]) self.assertEqual(model, msprime.DiscreteTimeWrightFisher()) self.assertEqual(events, expected_events) self.assertIsNot(events[0], expected_events[0]) model, events = msprime.parse_model_arg(["dtwf"] + expected_events) self.assertEqual(model, msprime.DiscreteTimeWrightFisher()) self.assertEqual(events, expected_events) self.assertIsNot(events[0], expected_events[0]) self.assertIsNot(events[1], expected_events[1])
def test_wf_hudson_different_specifications(self): Ne = 100 t = 100 ts1 = msprime.sim_ancestry( samples=5, population_size=Ne, model=[msprime.DiscreteTimeWrightFisher(duration=t), "hudson"], recombination_rate=0.1, sequence_length=1, discrete_genome=False, random_seed=2, ) ts2 = msprime.simulate( sample_size=10, recombination_rate=0.1, Ne=Ne, model="dtwf", demographic_events=[msprime.SimulationModelChange(t, "hudson")], random_seed=2, ) ts3 = msprime.simulate( sample_size=10, recombination_rate=0.1, Ne=Ne, model="dtwf", demographic_events=[msprime.SimulationModelChange(t)], random_seed=2, ) # Not worth trying to puzzle out the slight differences in tables # between the old and new form. The edges are the same, good enough. assert ts1.tables.edges == ts2.tables.edges assert ts2.equals(ts3, ignore_provenance=True)
def test_wf_hudson_different_specifications(self): Ne = 100 t = 100 ts1 = msprime.simulate( sample_size=10, Ne=Ne, model=["dtwf", (t, "hudson")], recombination_rate=0.1, random_seed=2, ) ts2 = msprime.simulate( sample_size=10, recombination_rate=0.1, Ne=Ne, model="dtwf", demographic_events=[msprime.SimulationModelChange(t, "hudson")], random_seed=2, ) ts3 = msprime.simulate( sample_size=10, recombination_rate=0.1, Ne=Ne, model="dtwf", demographic_events=[msprime.SimulationModelChange(t)], random_seed=2, ) t1 = ts1.dump_tables() t2 = ts2.dump_tables() t3 = ts3.dump_tables() t1.provenances.clear() t2.provenances.clear() t3.provenances.clear() self.assertEqual(t1, t2) self.assertEqual(t1, t3)
def test_two_events(self): expected_events = [ msprime.SimulationModelChange(time=1, model=msprime.StandardCoalescent()), msprime.SimulationModelChange(time=2, model=msprime.SmcApproxCoalescent()), ] model, events = ancestry._parse_model_arg(["dtwf", (1, "hudson"), (2, "smc")]) assert model == msprime.DiscreteTimeWrightFisher() assert events == expected_events model, events = ancestry._parse_model_arg( ["dtwf", (1, None), (2, msprime.SmcApproxCoalescent())] ) assert model == msprime.DiscreteTimeWrightFisher() assert events == expected_events model, events = ancestry._parse_model_arg( ["dtwf", expected_events[0], (2, msprime.SmcApproxCoalescent())] ) assert model == msprime.DiscreteTimeWrightFisher() assert events == expected_events model, events = ancestry._parse_model_arg( ["dtwf", expected_events[0], (2, msprime.SmcApproxCoalescent())] ) assert model == msprime.DiscreteTimeWrightFisher() assert events == expected_events assert events[0] is not expected_events[0] model, events = ancestry._parse_model_arg(["dtwf"] + expected_events) assert model == msprime.DiscreteTimeWrightFisher() assert events == expected_events assert events[0] is not expected_events[0] assert events[1] is not expected_events[1]
def test_models_out_of_order(self): with self.assertRaises(ValueError): msprime.simulate(Ne=10, sample_size=10, demographic_events=[ msprime.SimulationModelChange(10, "hudson"), msprime.SimulationModelChange(8, "hudson") ])
def test_simulation_model_change(self): examples = [ msprime.SimulationModelChange(), msprime.SimulationModelChange(model="hudson"), msprime.SimulationModelChange( model=msprime.DiscreteTimeWrightFisher()), msprime.SimulationModelChange( model=msprime.BetaCoalescent(alpha=1, truncation_point=2)), ] self.assert_repr_round_trip(examples)
def test_encode_simulation_models(self): simple_model = ["hudson", [10, "dtwf"], [20, "smc"], [None, None]] ts = msprime.simulate(10, model=simple_model) decoded = self.decode(ts.provenance(0).record) parameters = decoded.parameters self.assertEqual(parameters.sample_size, 10) self.assertEqual(list(parameters.model), simple_model) model_instances = [ msprime.StandardCoalescent(), msprime.SimulationModelChange(10, msprime.DiscreteTimeWrightFisher()), msprime.SimulationModelChange(20, msprime.SmcApproxCoalescent()), msprime.SimulationModelChange(30, msprime.BetaCoalescent(alpha=1.1)), ] ts = msprime.simulate(10, model=model_instances) decoded = self.decode(ts.provenance(0).record) parameters = decoded.parameters self.assertEqual(parameters.sample_size, 10) self.assertEqual(parameters.model[0], {"__class__": "msprime.ancestry.StandardCoalescent"}) self.assertDictEqual( parameters.model[1], { "__class__": "msprime.ancestry.SimulationModelChange", "model": { "__class__": "msprime.ancestry.DiscreteTimeWrightFisher" }, "time": 10, }, ) self.assertDictEqual( parameters.model[2], { "__class__": "msprime.ancestry.SimulationModelChange", "model": { "__class__": "msprime.ancestry.SmcApproxCoalescent" }, "time": 20, }, ) self.assertDictEqual( parameters.model[3], { "__class__": "msprime.ancestry.SimulationModelChange", "model": { "__class__": "msprime.ancestry.BetaCoalescent", "alpha": 1.1, "truncation_point": 1.0, }, "time": 30, }, )
def test_model_change_time_bad_func(self): def bad_func(t): return t - 1 with self.assertRaises(ValueError): msprime.simulate(Ne=10, sample_size=10, demographic_events=[ msprime.SimulationModelChange(1, "hudson"), msprime.SimulationModelChange( bad_func, "hudson") ])
def test_models_out_of_order(self): with pytest.raises(ValueError, match="durations must be >= 0"): msprime.simulate( Ne=10**6, sample_size=10, model="hudson", demographic_events=[ msprime.SimulationModelChange(10, "hudson"), msprime.SimulationModelChange(8, "hudson"), ], random_seed=2, )
def test_models_out_of_order(self): with self.assertRaises(ValueError): msprime.simulate( Ne=10**6, sample_size=10, model=[ "hudson", msprime.SimulationModelChange(10, "hudson"), msprime.SimulationModelChange(8, "hudson"), ], random_seed=2, )
def test_model_change_time_bad_func(self): def bad_func(t): return t - 1 with pytest.raises(ValueError): msprime.simulate( Ne=10, sample_size=10, model=[ None, msprime.SimulationModelChange(1, "hudson"), msprime.SimulationModelChange(bad_func, "hudson"), ], )
def test_sweep_model_change_time_complete(self): # Short sweep that doesn't coalesce followed # by Hudson phase to finish up coalescent sweep_model = msprime.SweepGenicSelection( reference_size=0.25, position=0.5, start_frequency=0.69, end_frequency=0.7, alpha=1e-5, dt=1, ) ts = msprime.simulate( 10, Ne=0.25, recombination_rate=2, demographic_events=[ msprime.SimulationModelChange(0, sweep_model), msprime.SimulationModelChange(None, "hudson"), ], random_seed=2, ) self.assertTrue(all(tree.num_roots == 1 for tree in ts.trees())) # Returning None from a function should be identical ts2 = msprime.simulate( 10, Ne=0.25, recombination_rate=2, demographic_events=[ msprime.SimulationModelChange(0, sweep_model), msprime.SimulationModelChange(lambda t: None, "hudson"), ], random_seed=2, ) t1 = ts.dump_tables() t2 = ts2.dump_tables() t1.provenances.clear() t2.provenances.clear() self.assertEqual(t1, t2) # Make sure that the Hudson phase did something. ts = msprime.simulate( 10, Ne=0.25, recombination_rate=2, demographic_events=[msprime.SimulationModelChange(0, sweep_model)], random_seed=2, ) self.assertTrue(any(tree.num_roots > 1 for tree in ts.trees()))
def test_simulation_models(self): simple_model = ["hudson", [10, "dtwf"], [20, "smc"]] ts = msprime.simulate(10, model=simple_model) self.verify(ts) model_instances = [ msprime.StandardCoalescent(), msprime.SimulationModelChange(10, msprime.DiscreteTimeWrightFisher()), msprime.SimulationModelChange(20, msprime.SmcApproxCoalescent()), msprime.SimulationModelChange(30, msprime.BetaCoalescent(alpha=1.1)), ] ts = msprime.simulate(10, model=model_instances) self.verify(ts)
def test_ped_wf_recombination(self): inds = np.array([1, 2, 3, 4, 5, 6]) parent_indices = np.array([4, 5, 4, 5, 4, 5, 4, 5, -1, -1, -1, -1]).reshape(-1, 2) times = np.array([0, 0, 0, 0, 1, 1]) is_sample = np.array([1, 1, 1, 1, 0, 0]) t = max(times) model = msprime.WrightFisherPedigree() ped = pedigrees.Pedigree(inds, parent_indices, times, is_sample, sex=None, ploidy=2) ts = msprime.simulate( sample_size=4, pedigree=ped, recombination_rate=0.1, model=model, demographic_events=[ msprime.SimulationModelChange(time=1, model="dtwf") ], ) tree = ts.first() assert tree.num_roots == 1 all_times = ts.tables.nodes.time ped_times = all_times[np.logical_and(all_times > 0, all_times <= t)] assert ped_times.shape[0] > 0 assert np.all(ped_times == np.floor(ped_times)) wf_times = all_times[all_times > t] assert wf_times.shape[0] > 0
def test_pedigree_unsupported_events(self): inds = np.array([1, 2, 3, 4, 5, 6]) parent_indices = np.array([4, 5, 4, 5, 4, 5, 4, 5, -1, -1, -1, -1]).reshape(-1, 2) times = np.array([0, 0, 0, 0, 1, 1]) is_sample = np.array([1, 1, 1, 1, 0, 0]) t = max(times) ped = msprime.Pedigree(inds, parent_indices, times, is_sample, sex=None, ploidy=2) bad_model_change = msprime.SimulationModelChange( 0.5, msprime.DiscreteTimeWrightFisher()) self.assertRaises( NotImplementedError, msprime.simulate, 4, pedigree=ped, demographic_events=[bad_model_change], model="wf_ped", ) bad_demographic_event = msprime.PopulationParametersChange( t, initial_size=2) self.assertRaises( NotImplementedError, msprime.simulate, 4, pedigree=ped, demographic_events=[bad_demographic_event], model="wf_ped", )
def test_model_change_no_model_inherits_Ne(self): sim = msprime.simulator_factory( sample_size=2, Ne=1500, demographic_events=[ msprime.SimulationModelChange( 1, msprime.DiscreteTimeWrightFisher(500)), msprime.SimulationModelChange(2, None) ]) self.assertEqual(sim.model.reference_size, 1500) self.assertEqual(len(sim.model_change_events), 2) self.assertEqual(sim.model_change_events[0].time, 1) self.assertEqual(sim.model_change_events[0].model.reference_size, 500) self.assertEqual(sim.model_change_events[1].time, 2) self.assertEqual(sim.model_change_events[1].model.reference_size, 1500) self.assertEqual(sim.model_change_events[1].model.name, "hudson")
def test_model_change_negative_time(self): with self.assertRaises(ValueError): msprime.simulate(Ne=10, sample_size=10, demographic_events=[ msprime.SimulationModelChange(-10, "hudson") ])
def test_ped_wf_recombination(self): inds = np.array([1, 2, 3, 4, 5, 6]) parent_indices = np.array([4, 5, 4, 5, 4, 5, 4, 5, -1, -1, -1, -1]).reshape(-1, 2) times = np.array([0, 0, 0, 0, 1, 1]) is_sample = np.array([1, 1, 1, 1, 0, 0]) t = max(times) model = msprime.WrightFisherPedigree() ped = msprime.Pedigree(inds, parent_indices, times, is_sample, sex=None, ploidy=2) ts = msprime.simulate( sample_size=4, pedigree=ped, recombination_rate=0.1, demographic_events=[ msprime.SimulationModelChange( 1, msprime.DiscreteTimeWrightFisher(2)) ], model=model, ) tree = ts.first() self.assertEqual(tree.num_roots, 1) all_times = ts.tables.nodes.time ped_times = all_times[np.logical_and(all_times > 0, all_times <= t)] self.assertGreater(ped_times.shape[0], 0) self.assertTrue(np.all(ped_times == np.floor(ped_times))) wf_times = all_times[all_times > t] self.assertGreater(wf_times.shape[0], 0)
def test_pedigree_unsupported_events(self): inds = np.array([1, 2, 3, 4, 5, 6]) parent_indices = np.array([4, 5, 4, 5, 4, 5, 4, 5, -1, -1, -1, -1]).reshape(-1, 2) times = np.array([0, 0, 0, 0, 1, 1]) is_sample = np.array([1, 1, 1, 1, 0, 0]) t = max(times) ped = pedigrees.Pedigree(inds, parent_indices, times, is_sample, sex=None, ploidy=2) bad_model_change = msprime.SimulationModelChange( 0.5, msprime.DiscreteTimeWrightFisher()) with pytest.raises(RuntimeError, match="not support interruption"): msprime.simulate( 4, pedigree=ped, demographic_events=[bad_model_change], model="wf_ped", ) bad_demographic_event = msprime.PopulationParametersChange( t, initial_size=2) with pytest.raises(_msprime.LibraryError): msprime.simulate( 4, pedigree=ped, demographic_events=[bad_demographic_event], model="wf_ped", )
def test_afs_calculation(ts_file='cached/balsac_140.tskit'): if not os.path.exists(ts_file): ped = msprime.Pedigree.read_txt('data/balsac_140.tsv') ped.set_samples(num_samples=14) des = [msprime.SimulationModelChange(max(ped.time))] sim = msprime.simulate(14, Ne=1000, pedigree=ped, model='wf_ped', length=1e8, mutation_rate=1e-8, recombination_rate=1e-8, demographic_events=des) sim.dump('cached/balsac_140.tskit') ts = msprime.load(ts_file) sample_nodes = ts.samples() afs = np.zeros(len(sample_nodes)+1) for tree in ts.trees(): for mutation in tree.mutations(): count = tree.num_samples(mutation.node) frequency = count / len(sample_nodes) afs[count] += 1 ts_afs = ts.allele_frequency_spectrum(polarised=True, span_normalise=False) assert np.allclose(afs, ts_afs)
def test_model_change_negative_time(self): with pytest.raises(ValueError): msprime.simulate( Ne=10, sample_size=10, model=[None, msprime.SimulationModelChange(-10, "hudson")], )
def test_sweep_model_change_time_complete(self): # Short sweep that doesn't coalesce followed # by Hudson phase to finish up coalescent sweep_model = msprime.SweepGenicSelection( position=0.5, start_frequency=0.69, end_frequency=0.7, alpha=1e5, dt=1 ) ts = msprime.simulate( 10, Ne=0.25, recombination_rate=2, model=[sweep_model, (None, None)], random_seed=2, ) assert all(tree.num_roots == 1 for tree in ts.trees()) # Returning None from a function should be identical ts2 = msprime.simulate( 10, Ne=0.25, recombination_rate=2, model=[ sweep_model, msprime.SimulationModelChange(lambda t: None, "hudson"), ], random_seed=2, ) self.assertTreeSequencesEqual(ts, ts2) # Make sure that the Hudson phase did something. ts = msprime.simulate( 10, Ne=0.25, recombination_rate=2, model=sweep_model, random_seed=2 ) assert any(tree.num_roots > 1 for tree in ts.trees())
def test_population_configuration(self): pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)] ts = msprime.simulate( population_configurations=pop_configs, migration_matrix=[[0, 1], [1, 0]], demographic_events=[msprime.SimulationModelChange(time=10)], ) self.verify(ts)