def verify(
        self,
        tree,
        newick=None,
        initial_size=1,
        branch_length_units="gen",
        generation_time=None,
    ):
        if newick is None:
            newick = self.make_newick(tree)
        demography = species_trees.parse_species_tree(
            newick,
            initial_size=initial_size,
            branch_length_units=branch_length_units,
            generation_time=generation_time,
        )
        assert demography.num_populations == tree.num_nodes
        for pop in demography.populations:
            assert pop.initial_size == initial_size
            assert pop.growth_rate == 0
            assert pop.name is not None

        # Population IDs are mapped to leaves first, and then to the internal nodes
        # in postorder
        pop_id_map = {}
        k = 0
        for u in tree.leaves():
            pop_id_map[u] = k
            k += 1

        for u in tree.nodes(order="postorder"):
            if tree.is_internal(u):
                pop_id_map[u] = k
                k += 1

        for u in tree.nodes():
            pop = demography.populations[pop_id_map[u]]
            assert pop.growth_rate == 0
            if tree.is_leaf(u):
                # Assuming we're using the make_newick function above
                assert pop.name == f"node_{u}"

        # We should have demographic events for every internal node, and
        # events should be output in increasing time order.
        j = 0
        for node in tree.nodes(order="timeasc"):
            children = tree.children(node)
            dest = pop_id_map[node]
            for child in children:
                event = demography.events[j]
                j += 1
                assert isinstance(event, msprime.MassMigration)
                assert event.time == pytest.approx(tree.time(node))
                source = pop_id_map[child]
                assert event.source == source
                assert event.dest == dest

        assert j == len(demography.events)
Esempio n. 2
0
    def test_bad_parameter(self):
        good_tree = "(((human:5.6,chimpanzee:5.6):3.0,gorilla:8.6):9.4,orangutan:18.0)"
        good_time_units = "myr"
        good_ne = 10000
        good_generation_time = 5
        for bad_time_units in [-3, "asdf", ["myr"]]:
            with pytest.raises(ValueError):
                species_trees.parse_species_tree(
                    good_tree,
                    time_units=bad_time_units,
                    initial_size=good_ne,
                    generation_time=good_generation_time,
                )

        with pytest.raises(TypeError):
            species_trees.parse_species_tree(good_tree, None)

        for bad_ne in [-3, "x"]:
            with pytest.raises(ValueError):
                species_trees.parse_species_tree(
                    good_tree,
                    time_units=good_time_units,
                    initial_size=bad_ne,
                    generation_time=good_generation_time,
                )
        for bad_generation_time in [None, -3, "x"]:
            with pytest.raises(ValueError):
                species_trees.parse_species_tree(
                    good_tree,
                    time_units=good_time_units,
                    initial_size=good_ne,
                    generation_time=bad_generation_time,
                )
        for bad_time_units in ["gen"]:
            with pytest.raises(ValueError):
                species_trees.parse_species_tree(
                    good_tree,
                    time_units=bad_time_units,
                    initial_size=good_ne,
                    generation_time=good_generation_time,
                )
Esempio n. 3
0
 def test_bad_tree(self):
     bad_trees = [
         "",
         ";",
         "abcd",
         ";;;",
         "___",
         "∞",
         "(",
         ")",
         "()",
         "( )",
         "(()())",
         "((3:0.39,5:0.39]:1.39,(4:0.47,(1:0.18,2:0.18):0.29):1.31);",
         "((3:0.39,5:0.39(:1.39,(4:0.47,(1:0.18,2:0.18):0.29):1.31);",
         "((3:0.39,5:0.39,:1.39,(4:0.47,(1:0.18,2:0.18):0.29):1.31);",
         "(4:0.47,(1:0.18,2:0.18):0.29):1.31);",
     ]
     for bad_tree in bad_trees:
         with pytest.raises(ValueError):
             species_trees.parse_species_tree(tree=bad_tree, initial_size=1)
Esempio n. 4
0
 def test_bad_params(self):
     with pytest.raises(TypeError):
         species_trees.parse_species_tree()
     with pytest.raises(TypeError):
         species_trees.parse_species_tree(tree="()")
     with pytest.raises(TypeError):
         species_trees.parse_species_tree(initial_size=1)
Esempio n. 5
0
    def test_4_species_run(self):
        species_tree = (
            "(((human:5.6,chimpanzee:5.6):3.0,gorilla:8.6):9.4,orangutan:18.0)"
        )
        spec = species_trees.parse_species_tree(
            species_tree,
            time_units="myr",
            initial_size=10000,
            generation_time=20,
        )

        # Take one sample from each population
        ts = msprime.sim_ancestry(samples={j: 1
                                           for j in range(4)},
                                  demography=spec,
                                  ploidy=1)

        assert ts.num_trees == 1
        assert ts.num_samples == 4
        assert ts.num_populations == 7
        for j, u in enumerate(ts.samples()):
            assert ts.node(u).population == j

        pops = list(ts.populations())
        assert pops[0].metadata["name"] == "human"
        assert pops[1].metadata["name"] == "chimpanzee"
        assert pops[2].metadata["name"] == "gorilla"
        assert pops[3].metadata["name"] == "orangutan"
        assert pops[4].metadata["name"] == "pop_4"
        assert pops[5].metadata["name"] == "pop_5"
        assert pops[6].metadata["name"] == "pop_6"

        # Use the population names to get the samples
        samples = dict(human=4, gorilla=2)
        ts = msprime.sim_ancestry(samples=samples, demography=spec)
        assert ts.num_trees == 1
        assert ts.num_samples == 12
        for j, u in enumerate(ts.samples()):
            pop = 0 if j < 8 else 2
            assert ts.node(u).population == pop

        # Order of keywords is respected
        ts = msprime.sim_ancestry(samples={
            "gorilla": 2,
            "human": 4
        },
                                  demography=spec)
        assert ts.num_trees == 1
        assert ts.num_samples == 12
        for j, u in enumerate(ts.samples()):
            pop = 2 if j < 4 else 0
            assert ts.node(u).population == pop
Esempio n. 6
0
 def test_4_species_parse(self):
     good_tree = "(((human:5.6,chimpanzee:5.6):3.0,gorilla:8.6):9.4,orangutan:18.0)"
     good_time_units = "myr"
     good_ne = 10000
     good_generation_time = 20
     demography = species_trees.parse_species_tree(
         good_tree,
         time_units=good_time_units,
         initial_size=good_ne,
         generation_time=good_generation_time,
     )
     assert isinstance(demography.populations, list)
     assert len(demography.populations) == 7
     for pop in demography.populations:
         assert isinstance(pop, msprime.demography.Population)
     assert isinstance(demography.events, list)
     assert len(demography.events) == 3
     for mm in demography.events:
         assert isinstance(mm, msprime.demography.PopulationSplit)
Esempio n. 7
0
 def test_duplicate_name(self):
     with pytest.raises(ValueError, match="Duplicate population name"):
         species_trees.parse_species_tree(tree="(popA:100.0,popA:100.0)",
                                          initial_size=1)
Esempio n. 8
0
 def test_unequal_branch_lengths(self):
     with pytest.raises(ValueError):
         species_trees.parse_species_tree(tree="(popA:100.0,popB:10.0)",
                                          initial_size=1000)
Esempio n. 9
0
 def verify_non_ultrametric(self, tree):
     newick = tree.newick()
     with pytest.raises(ValueError):
         species_trees.parse_species_tree(newick, initial_size=1)