def test_4_species_run(self): species_tree = ( "(((human:5.6,chimpanzee:5.6):3.0,gorilla:8.6):9.4,orangutan:18.0)" ) spec = msprime.parse_species_tree(species_tree, branch_length_units="myr", Ne=10000, generation_time=20) # Take one sample from each population ts = msprime.simulate(samples=spec.sample(1, 1, 1, 1), demography=spec) assert ts.num_trees == 1 assert ts.num_samples == 4 assert ts.num_populations == 4 for j, u in enumerate(ts.samples()): assert ts.node(u).population == j # Use the population names to get the samples samples = spec.sample(human=4, gorilla=2) ts = msprime.simulate(samples=samples, demography=spec) assert ts.num_trees == 1 assert ts.num_samples == 6 for j, u in enumerate(ts.samples()): pop = 0 if j < 4 else 2 assert ts.node(u).population == pop # Order of keywords is respected samples = spec.sample(gorilla=2, human=4) ts = msprime.simulate(samples=samples, demography=spec) assert ts.num_trees == 1 assert ts.num_samples == 6 for j, u in enumerate(ts.samples()): pop = 2 if j < 2 else 0 assert ts.node(u).population == pop
def verify(self, tree, newick=None, Ne=1, branch_length_units="gen", generation_time=None): if newick is None: newick = tree.newick() population_configurations, demographic_events = msprime.parse_species_tree( newick, Ne=Ne, branch_length_units=branch_length_units, generation_time=generation_time, ) self.assertEqual(len(population_configurations), tree.num_samples()) for pop_config in population_configurations: self.assertEqual(pop_config.initial_size, Ne) self.assertEqual(pop_config.growth_rate, 0) self.assertIn("species_name", pop_config.metadata) # Population IDs are mapped to leaves as they are encountered in a postorder # traversal. pop_id_map = {} k = 0 for u in tree.nodes(order="postorder"): if tree.is_leaf(u): pop_id_map[u] = k k += 1 else: pop_id_map[u] = pop_id_map[tree.left_child(u)] for u in tree.leaves(): pop_config = population_configurations[pop_id_map[u]] self.assertEqual(pop_config.growth_rate, 0) # Note: we're assuming the default newick here in tskit that labels # nodes as their id + 1. self.assertEqual(pop_config.metadata["species_name"], f"{u + 1}") # We should have demographic events for every non-unary internal node, and # events should be output in increasing time order. j = 0 for node in [u for u in tree.nodes(order="timeasc")]: children = tree.children(node) if len(children) > 1: self.assertEqual(node, tree.mrca(children[0], children[1])) dest = pop_id_map[node] for child in children[1:]: event = demographic_events[j] j += 1 self.assertIsInstance(event, msprime.MassMigration) self.assertAlmostEqual(event.time, tree.time(node)) source = pop_id_map[child] self.assertEqual(event.source, source) self.assertEqual(event.dest, dest) self.assertEqual(j, len(demographic_events))
def verify(self, tree, newick=None, Ne=1, branch_length_units="gen", generation_time=None): if newick is None: newick = tree.newick() spec = msprime.parse_species_tree( newick, Ne=Ne, branch_length_units=branch_length_units, generation_time=generation_time, ) assert spec.num_populations == tree.num_samples() for pop in spec.populations: assert pop.initial_size == Ne assert pop.growth_rate == 0 assert pop.name is not None # Population IDs are mapped to leaves as they are encountered in a postorder # traversal. pop_id_map = {} k = 0 for u in tree.nodes(order="postorder"): if tree.is_leaf(u): pop_id_map[u] = k k += 1 else: pop_id_map[u] = pop_id_map[tree.left_child(u)] for u in tree.leaves(): pop = spec.populations[pop_id_map[u]] assert pop.growth_rate == 0 # Note: we're assuming the default newick here in tskit that labels # nodes as their id + 1. assert pop.name == f"{u + 1}" # We should have demographic events for every non-unary internal node, and # events should be output in increasing time order. j = 0 for node in [u for u in tree.nodes(order="timeasc")]: children = tree.children(node) if len(children) > 1: assert node == tree.mrca(children[0], children[1]) dest = pop_id_map[node] for child in children[1:]: event = spec.events[j] j += 1 assert isinstance(event, msprime.MassMigration) self.assertAlmostEqual(event.time, tree.time(node)) source = pop_id_map[child] assert event.source == source assert event.dest == dest assert j == len(spec.events)
def test_bad_parameter(self): good_tree = "(((human:5.6,chimpanzee:5.6):3.0,gorilla:8.6):9.4,orangutan:18.0)" good_branch_length_units = "myr" good_ne = 10000 good_generation_time = 5 for bad_branch_length_units in [-3, "asdf", ["myr"]]: with self.assertRaises(ValueError): msprime.parse_species_tree( good_tree, branch_length_units=bad_branch_length_units, Ne=good_ne, generation_time=good_generation_time, ) with self.assertRaises(TypeError): msprime.parse_species_tree(good_tree, None) for bad_ne in [-3, "x"]: with self.assertRaises(ValueError): msprime.parse_species_tree( good_tree, branch_length_units=good_branch_length_units, Ne=bad_ne, generation_time=good_generation_time, ) for bad_generation_time in [None, -3, "x"]: with self.assertRaises(ValueError): msprime.parse_species_tree( good_tree, branch_length_units=good_branch_length_units, Ne=good_ne, generation_time=bad_generation_time, ) for bad_branch_length_units in ["gen"]: with self.assertRaises(ValueError): msprime.parse_species_tree( good_tree, branch_length_units=bad_branch_length_units, Ne=good_ne, generation_time=good_generation_time, )
def test_bad_tree(self): bad_trees = [ "", ";", "abcd", ";;;", "___", "∞", "(", ")", "()", "( )", "(()())", "((3:0.39,5:0.39]:1.39,(4:0.47,(1:0.18,2:0.18):0.29):1.31);", "((3:0.39,5:0.39(:1.39,(4:0.47,(1:0.18,2:0.18):0.29):1.31);", "((3:0.39,5:0.39,:1.39,(4:0.47,(1:0.18,2:0.18):0.29):1.31);", "(4:0.47,(1:0.18,2:0.18):0.29):1.31);", ] for bad_tree in bad_trees: with self.assertRaises(ValueError): msprime.parse_species_tree(tree=bad_tree, Ne=1)
def test_bad_params(self): with pytest.raises(TypeError): msprime.parse_species_tree() with pytest.raises(TypeError): msprime.parse_species_tree(tree="()") with pytest.raises(TypeError): msprime.parse_species_tree(Ne=1)
def test_4_species_parse(self): good_tree = "(((human:5.6,chimpanzee:5.6):3.0,gorilla:8.6):9.4,orangutan:18.0)" good_branch_length_units = "myr" good_ne = 10000 good_generation_time = 20 spec = msprime.parse_species_tree( good_tree, branch_length_units=good_branch_length_units, Ne=good_ne, generation_time=good_generation_time, ) assert isinstance(spec.populations, list) assert len(spec.populations) == 4 for pop in spec.populations: assert isinstance(pop, msprime.demography.Population) assert isinstance(spec.events, list) assert len(spec.events) == 3 for mm in spec.events: assert isinstance(mm, msprime.demography.MassMigration)
def test_4_species(self): good_tree = "(((human:5.6,chimpanzee:5.6):3.0,gorilla:8.6):9.4,orangutan:18.0)" good_branch_length_units = "myr" good_ne = 10000 good_generation_time = 20 parsed_tuple = msprime.parse_species_tree( good_tree, branch_length_units=good_branch_length_units, Ne=good_ne, generation_time=good_generation_time, ) self.assertEqual(len(parsed_tuple), 2) self.assertIsInstance(parsed_tuple[0], list) self.assertEqual(len(parsed_tuple[0]), 4) for pc in parsed_tuple[0]: self.assertIsInstance(pc, msprime.simulations.PopulationConfiguration) self.assertIsInstance(parsed_tuple[1], list) self.assertEqual(len(parsed_tuple[1]), 3) for mm in parsed_tuple[1]: self.assertIsInstance(mm, msprime.simulations.MassMigration)