Esempio n. 1
0
def generate_contained_trees(
        containing_tree,
        contained_taxon_namespace=None,
        population_size=1,
        num_individuals_per_population=4,
        num_gene_trees=5,
        rng=None):
    if contained_taxon_namespace is None:
        contained_taxon_namespace = dendropy.TaxonNamespace()
    contained_to_containing_map = {}
    assert len(containing_tree.taxon_namespace) > 0
    for sp_idx, sp_tax in enumerate(containing_tree.taxon_namespace):
        for gidx in range(num_individuals_per_population):
            glabel = "{sp}_{ind}^{sp}_{ind}".format(sp=sp_tax.label, ind=gidx+1)
            # glabel = "{sp}^{sp}_{ind}".format(sp=sp_tax.label, ind=gidx+1)
            g = contained_taxon_namespace.require_taxon(label=glabel)
            g.population_label = sp_tax.label
            contained_to_containing_map[g] = sp_tax
    ct = reconcile.ContainingTree(
            containing_tree=containing_tree,
            contained_taxon_namespace=contained_taxon_namespace,
            contained_to_containing_taxon_map=contained_to_containing_map)
    gene_trees = dendropy.TreeList(taxon_namespace=contained_taxon_namespace)
    for gtidx in range(num_gene_trees):
        gt = ct.embed_contained_kingman(
                default_pop_size=population_size,
                rng=rng)
        gene_trees.append(gt)
    return gene_trees
Esempio n. 2
0
def generate_contained_trees(
        containing_tree,
        contained_taxon_namespace=None,
        population_size=1,
        total_number_of_individuals=200,
        num_gene_trees=5,
        rng=None):
    if contained_taxon_namespace is None:
        contained_taxon_namespace = dendropy.TaxonNamespace()
    contained_to_containing_map = {}
    assert len(containing_tree.taxon_namespace) > 0
    containing_tree = process_containing_tree_for_gene_samples(
            containing_tree=containing_tree,
            total_number_of_individuals=total_number_of_individuals,
            rng=rng)
    containing_tree_leaf_nodes = containing_tree.leaf_nodes()
    for sp_idx, sp_node in enumerate(containing_tree_leaf_nodes):
        sp_tax = sp_node.taxon
        for gidx in range(sp_node.num_individuals_sampled):
            glabel = "{sp}_{ind}^{sp}".format(sp=sp_tax.label, ind=gidx+1)
            # glabel = "{sp}^{sp}_{ind}".format(sp=sp_tax.label, ind=gidx+1)
            g = contained_taxon_namespace.require_taxon(label=glabel)
            g.population_label = sp_tax.label
            contained_to_containing_map[g] = sp_tax
    ct = reconcile.ContainingTree(
            containing_tree=containing_tree,
            contained_taxon_namespace=contained_taxon_namespace,
            contained_to_containing_taxon_map=contained_to_containing_map)
    gene_trees = dendropy.TreeList(taxon_namespace=contained_taxon_namespace)
    for gtidx in range(num_gene_trees):
        gt = ct.embed_contained_kingman(
                default_pop_size=population_size,
                rng=rng)
        gene_trees.append(gt)
    return containing_tree, gene_trees
Esempio n. 3
0
 def testFittedEdgesDeepCoalCount(self):
     for idx, gt in enumerate(self.gene_trees):
         gt.encode_bipartitions()
         ct = reconcile.ContainingTree(
             containing_tree=self.species_tree,
             contained_taxon_namespace=self.gene_trees.taxon_namespace,
             contained_to_containing_taxon_map=self.
             gene_taxon_to_population_taxon_map,
             contained_trees=[gt],
             fit_containing_edge_lengths=True,
         )
         dc = ct.num_deep_coalescences()
Esempio n. 4
0
    def testFixedEdgesDeepCoalCount(self):
        results = []
        for idx, gt in enumerate(self.gene_trees):
            ct = reconcile.ContainingTree(
                containing_tree=self.species_tree,
                contained_taxon_namespace=self.gene_trees.taxon_namespace,
                contained_to_containing_taxon_map=self.
                gene_taxon_to_population_taxon_map,
                contained_trees=[gt],
                fit_containing_edge_lengths=False,
            )
            dc = ct.num_deep_coalescences()
            results.append(dc)

            ## FOR DEBUGGING
            # mesqf = pathmap.named_output_stream("ContainingTreeDeepCoalescence_Small_FixedEdges_t%02d_dc%02d.nex" % (idx+1, dc), False)
            # with mesqf:
            #     ct.write_as_mesquite(mesqf)

        self.assertEqual(results, self.expected_under_original_brlens)
        data=stepwise_tree_str,
        schema="newick",
        taxon_namespace=containing_taxa)
frag_tree = dendropy.Tree.get(
        data=frag_tree_str,
        schema="newick",
        taxon_namespace=containing_taxa)

# taxon set association
genes_to_species = dendropy.TaxonNamespaceMapping.create_contained_taxon_mapping(
        containing_taxon_namespace=containing_taxa,
        num_contained=8)

# convert to containing tree
stepwise_tree = reconcile.ContainingTree(stepwise_tree,
            contained_taxon_namespace=genes_to_species.domain_taxon_namespace,
            contained_to_containing_taxon_map=genes_to_species)
frag_tree = reconcile.ContainingTree(frag_tree,
            contained_taxon_namespace=genes_to_species.domain_taxon_namespace,
            contained_to_containing_taxon_map=genes_to_species)

# for each rep
for rep in range(num_reps):
    gene_tree1 = treesim.contained_coalescent_tree(containing_tree=stepwise_tree,
        gene_to_containing_taxon_map=genes_to_species,
        default_pop_size=40000)
    stepwise_tree.embed_tree(gene_tree1)
    gene_tree2 = treesim.contained_coalescent_tree(containing_tree=frag_tree,
        gene_to_containing_taxon_map=genes_to_species,
        default_pop_size=40000)
    frag_tree.embed_tree(gene_tree2)