def verify( self, tree, newick=None, initial_size=1, branch_length_units="gen", generation_time=None, ): if newick is None: newick = self.make_newick(tree) demography = species_trees.parse_species_tree( newick, initial_size=initial_size, branch_length_units=branch_length_units, generation_time=generation_time, ) assert demography.num_populations == tree.num_nodes for pop in demography.populations: assert pop.initial_size == initial_size assert pop.growth_rate == 0 assert pop.name is not None # Population IDs are mapped to leaves first, and then to the internal nodes # in postorder pop_id_map = {} k = 0 for u in tree.leaves(): pop_id_map[u] = k k += 1 for u in tree.nodes(order="postorder"): if tree.is_internal(u): pop_id_map[u] = k k += 1 for u in tree.nodes(): pop = demography.populations[pop_id_map[u]] assert pop.growth_rate == 0 if tree.is_leaf(u): # Assuming we're using the make_newick function above assert pop.name == f"node_{u}" # We should have demographic events for every internal node, and # events should be output in increasing time order. j = 0 for node in tree.nodes(order="timeasc"): children = tree.children(node) dest = pop_id_map[node] for child in children: event = demography.events[j] j += 1 assert isinstance(event, msprime.MassMigration) assert event.time == pytest.approx(tree.time(node)) source = pop_id_map[child] assert event.source == source assert event.dest == dest assert j == len(demography.events)
def test_bad_parameter(self): good_tree = "(((human:5.6,chimpanzee:5.6):3.0,gorilla:8.6):9.4,orangutan:18.0)" good_time_units = "myr" good_ne = 10000 good_generation_time = 5 for bad_time_units in [-3, "asdf", ["myr"]]: with pytest.raises(ValueError): species_trees.parse_species_tree( good_tree, time_units=bad_time_units, initial_size=good_ne, generation_time=good_generation_time, ) with pytest.raises(TypeError): species_trees.parse_species_tree(good_tree, None) for bad_ne in [-3, "x"]: with pytest.raises(ValueError): species_trees.parse_species_tree( good_tree, time_units=good_time_units, initial_size=bad_ne, generation_time=good_generation_time, ) for bad_generation_time in [None, -3, "x"]: with pytest.raises(ValueError): species_trees.parse_species_tree( good_tree, time_units=good_time_units, initial_size=good_ne, generation_time=bad_generation_time, ) for bad_time_units in ["gen"]: with pytest.raises(ValueError): species_trees.parse_species_tree( good_tree, time_units=bad_time_units, initial_size=good_ne, generation_time=good_generation_time, )
def test_bad_tree(self): bad_trees = [ "", ";", "abcd", ";;;", "___", "∞", "(", ")", "()", "( )", "(()())", "((3:0.39,5:0.39]:1.39,(4:0.47,(1:0.18,2:0.18):0.29):1.31);", "((3:0.39,5:0.39(:1.39,(4:0.47,(1:0.18,2:0.18):0.29):1.31);", "((3:0.39,5:0.39,:1.39,(4:0.47,(1:0.18,2:0.18):0.29):1.31);", "(4:0.47,(1:0.18,2:0.18):0.29):1.31);", ] for bad_tree in bad_trees: with pytest.raises(ValueError): species_trees.parse_species_tree(tree=bad_tree, initial_size=1)
def test_bad_params(self): with pytest.raises(TypeError): species_trees.parse_species_tree() with pytest.raises(TypeError): species_trees.parse_species_tree(tree="()") with pytest.raises(TypeError): species_trees.parse_species_tree(initial_size=1)
def test_4_species_run(self): species_tree = ( "(((human:5.6,chimpanzee:5.6):3.0,gorilla:8.6):9.4,orangutan:18.0)" ) spec = species_trees.parse_species_tree( species_tree, time_units="myr", initial_size=10000, generation_time=20, ) # Take one sample from each population ts = msprime.sim_ancestry(samples={j: 1 for j in range(4)}, demography=spec, ploidy=1) assert ts.num_trees == 1 assert ts.num_samples == 4 assert ts.num_populations == 7 for j, u in enumerate(ts.samples()): assert ts.node(u).population == j pops = list(ts.populations()) assert pops[0].metadata["name"] == "human" assert pops[1].metadata["name"] == "chimpanzee" assert pops[2].metadata["name"] == "gorilla" assert pops[3].metadata["name"] == "orangutan" assert pops[4].metadata["name"] == "pop_4" assert pops[5].metadata["name"] == "pop_5" assert pops[6].metadata["name"] == "pop_6" # Use the population names to get the samples samples = dict(human=4, gorilla=2) ts = msprime.sim_ancestry(samples=samples, demography=spec) assert ts.num_trees == 1 assert ts.num_samples == 12 for j, u in enumerate(ts.samples()): pop = 0 if j < 8 else 2 assert ts.node(u).population == pop # Order of keywords is respected ts = msprime.sim_ancestry(samples={ "gorilla": 2, "human": 4 }, demography=spec) assert ts.num_trees == 1 assert ts.num_samples == 12 for j, u in enumerate(ts.samples()): pop = 2 if j < 4 else 0 assert ts.node(u).population == pop
def test_4_species_parse(self): good_tree = "(((human:5.6,chimpanzee:5.6):3.0,gorilla:8.6):9.4,orangutan:18.0)" good_time_units = "myr" good_ne = 10000 good_generation_time = 20 demography = species_trees.parse_species_tree( good_tree, time_units=good_time_units, initial_size=good_ne, generation_time=good_generation_time, ) assert isinstance(demography.populations, list) assert len(demography.populations) == 7 for pop in demography.populations: assert isinstance(pop, msprime.demography.Population) assert isinstance(demography.events, list) assert len(demography.events) == 3 for mm in demography.events: assert isinstance(mm, msprime.demography.PopulationSplit)
def test_duplicate_name(self): with pytest.raises(ValueError, match="Duplicate population name"): species_trees.parse_species_tree(tree="(popA:100.0,popA:100.0)", initial_size=1)
def test_unequal_branch_lengths(self): with pytest.raises(ValueError): species_trees.parse_species_tree(tree="(popA:100.0,popB:10.0)", initial_size=1000)
def verify_non_ultrametric(self, tree): newick = tree.newick() with pytest.raises(ValueError): species_trees.parse_species_tree(newick, initial_size=1)