Esempio n. 1
0
def pure_kingman(taxon_set, pop_size=1, rng=None):
    """
    Generates a tree under the unconstrained Kingman's coalescent process.
    """

    # get our random number generator
    if rng is None:
        rng = GLOBAL_RNG  # use the global rng by default

    nodes = [dataobject.Node(taxon=t) for t in taxon_set]
    seed_node = coalescent.coalesce(nodes=nodes, pop_size=pop_size, period=None, rng=rng, use_expected_tmrca=True)[0]
    tree = dataobject.Tree(taxon_set=taxon_set, seed_node=seed_node)
    return tree
Esempio n. 2
0
def mean_kingman(taxon_set, pop_size=1):
    """
    Returns a tree with coalescent intervals given by the expected times under
    Kingman's neutral coalescent.
    """

    # get our random number generator
    if rng is None:
        rng = GLOBAL_RNG  # use the global rng by default

    nodes = [dataobject.Node(taxon=t) for t in taxon_set]
    seed_node = coalescent.coalesce(nodes=nodes, pop_size=pop_size, period=None, rng=rng, use_expected_tmrca=True)[0]
    tree = dataobject.Tree(taxon_set=taxon_set, seed_node=seed_node)
    return tree
Esempio n. 3
0
def pure_kingman(taxon_set, pop_size=1, rng=None):
    """
    Generates a tree under the unconstrained Kingman's coalescent process.
    """

    # get our random number generator
    if rng is None:
        rng = GLOBAL_RNG  # use the global rng by default

    nodes = [dataobject.Node(taxon=t) for t in taxon_set]
    seed_node = coalescent.coalesce(nodes=nodes,
                                    pop_size=pop_size,
                                    period=None,
                                    rng=rng)[0]
    tree = dataobject.Tree(taxon_set=taxon_set, seed_node=seed_node)
    return tree
Esempio n. 4
0
def mean_kingman(taxon_set, pop_size=1):
    """
    Returns a tree with coalescent intervals given by the expected times under
    Kingman's neutral coalescent.
    """

    # get our random number generator
    if rng is None:
        rng = GLOBAL_RNG  # use the global rng by default

    nodes = [dataobject.Node(taxon=t) for t in taxon_set]
    seed_node = coalescent.coalesce(nodes=nodes,
                                    pop_size=pop_size,
                                    period=None,
                                    rng=rng,
                                    use_expected_tmrca=True)[0]
    tree = dataobject.Tree(taxon_set=taxon_set, seed_node=seed_node)
    return tree
Esempio n. 5
0
def constrained_kingman(
    pop_tree,
    gene_tree_list=None,
    rng=None,
    gene_node_label_func=None,
    num_genes_attr="num_genes",
    pop_size_attr="pop_size",
    decorate_original_tree=False,
):
    """
    Given a population tree, `pop_tree` this will return a *pair of
    trees*: a gene tree simulated on this population tree based on
    Kingman's n-coalescent, and population tree with the additional
    attribute 'gene_nodes' on each node, which is a list of
    uncoalesced nodes from the gene tree associated with the given
    node from the population tree.

    `pop_tree` should be a DendroPy Tree object or an object
    of a class derived from this with the following attribute
    `num_genes` -- the number of gene samples from each population in the
    present.  Each edge on the tree should also have the attribute

    `pop_size_attr` is the attribute name of the edges of `pop_tree` that
    specify the population size. By default it is `pop_size`. The should
    specify the effective *haploid* population size; i.e., number of gene
    in the population: 2 * N in a diploid population of N individuals,
    or N in a haploid population of N individuals.

    If `pop_size` is 1 or 0 or None, then the edge lengths of `pop_tree` is
    taken to be in haploid population units; i.e. where 1 unit equals 2N
    generations for a diploid population of size N, or N generations for a
    haploid population of size N. Otherwise the edge lengths of `pop_tree` is
    taken to be in generations.

    If `gene_tree_list` is given, then the gene tree is added to the
    tree block, and the tree block's taxa block will be used to manage
    the gene tree's `taxa`.

    `gene_node_label_func` is a function that takes two arguments (a string
    and an integer, respectively, where the string is the containing species
    taxon label and the integer is the gene index) and returns a label for
    the corresponding the gene node.

    if `decorate_original_tree` is True, then the list of uncoalesced nodes at
    each node of the population tree is added to the original (input) population
    tree instead of a copy.

    Note that this function does very much the same thing as `contained_coalescent()`,
    but provides a very different API.
    """

    # get our random number generator
    if rng is None:
        rng = GLOBAL_RNG  # use the global rng by default

    if gene_tree_list is not None:
        gtaxa = gene_tree_list.taxon_set
    else:
        gtaxa = dataobject.TaxonSet()

    if gene_node_label_func is None:
        gene_node_label_func = lambda x, y: "%s_%02d" % (x, y)

    # we create a set of gene nodes for each leaf node on the population
    # tree, and associate those gene nodes to the leaf by assignment
    # of 'taxon'.
    for leaf_count, leaf in enumerate(pop_tree.leaf_iter()):
        gene_nodes = []
        for gene_count in range(getattr(leaf, num_genes_attr)):
            gene_node = dataobject.Node()
            gene_node.taxon = gtaxa.require_taxon(label=gene_node_label_func(leaf.taxon.label, gene_count + 1))
            gene_nodes.append(gene_node)
        leaf.gene_nodes = gene_nodes

    # We iterate through the edges of the population tree in post-order,
    # i.e., visiting child edges before we visit parent edges. For
    # each edge visited, we take the genes found in the child nodes,
    # and run the coalescent simulation on them attacheded by the length
    # of the edge. Any genes that have not yet coalesced at the end of
    # this period are added to the genes of the tail (parent) node of
    # the edge.

    if decorate_original_tree:
        working_poptree = pop_tree
    else:
        # start with a new (deep) copy of the population tree so as to not
        # to change the original tree
        working_poptree = dataobject.Tree(pop_tree)

    # start with a new tree
    gene_tree = dataobject.Tree()
    gene_tree.taxon_set = gtaxa
    for edge in working_poptree.postorder_edge_iter():

        # if mrca root, run unconstrained coalescent
        if edge.head_node.parent_node is None:
            if len(edge.head_node.gene_nodes) > 1:
                final = coalescent.coalesce(nodes=edge.head_node.gene_nodes, pop_size=pop_size, period=None, rng=rng)
            else:
                final = edge.head_node.gene_nodes
            gene_tree.seed_node = final[0]
        else:

            if hasattr(edge, pop_size_attr):
                pop_size = getattr(edge, pop_size_attr)
            else:
                # this means all our time will be in population units
                pop_size = 1

            uncoal = coalescent.coalesce(
                nodes=edge.head_node.gene_nodes, pop_size=pop_size, period=edge.length, rng=rng
            )
            if not hasattr(edge.tail_node, "gene_nodes"):
                edge.tail_node.gene_nodes = []
            edge.tail_node.gene_nodes.extend(uncoal)

    gene_tree.is_rooted = True
    if gene_tree_list is not None:
        gene_tree_list.append(gene_tree)
        return gene_tree, working_poptree
    else:
        return gene_tree, working_poptree
Esempio n. 6
0
def contained_coalescent(
    containing_tree, gene_to_containing_taxon_map, edge_pop_size_attr="pop_size", default_pop_size=1, rng=None
):
    """
    Returns a gene tree simulated under the coalescent contained within a
    population or species tree.

        `containing_tree`
            The population or species tree. If `edge_pop_size_map` is not None,
            and population sizes given are non-trivial (i.e., >1), then edge
            lengths on this tree are in units of generations. Otherwise edge
            lengths are in population units; i.e. 2N generations for diploid
            populations of size N, or N generations for diploid populations of
            size N.

        `gene_to_containing_taxon_map`
            A TaxonSetMapping object mapping Taxon objects in the
            `containing_tree` TaxonSet to corresponding Taxon objects in the
            resulting gene tree.

        `edge_pop_size_attr`
            Name of attribute of edges that specify population size. By default
            this is "pop_size". If this attribute does not exist,
            `default_pop_size` will be used.  The value for this attribute
            should be the haploid population size or the number of genes;
            i.e.  2N for a diploid population of N individuals, or N for a
            haploid population of N individuals. This value determines how
            branch length units are interpreted in the input tree,
            `containing_tree`.  If a biologically-meaningful value, then branch
            lengths on the `containing_tree` are properly read as generations.
            If not (e.g. 1 or 0), then they are in population units, i.e. where
            1 unit of time equals 2N generations for a diploid population of
            size N, or N generations for a haploid population of size N.
            Otherwise time is in generations. If this argument is None, then
            population sizes default to `default_pop_size`.

        `default_pop_size`
            Population size to use if `edge_pop_size_attr` is None or
            if an edge does not have the attribute. Defaults to 1.

    The returned gene tree will have the following extra attributes:

        `pop_node_genes`
            A dictionary with nodes of `containing_tree` as keys and a list of gene
            tree nodes that are uncoalesced as values.

    Note that this function does very much the same thing as
    `constrained_kingman()`, but provides a very different API.
    """

    if rng is None:
        rng = GLOBAL_RNG

    gene_tree_taxon_set = gene_to_containing_taxon_map.domain_taxon_set
    if gene_tree_taxon_set is None:
        gene_tree_taxon_set = dendropy.TaxonSet()
        for gene_taxa in pop_gene_taxa_map:
            for taxon in gene_taxa:
                gene_tree_taxon_set.add(taxon)
    gene_tree = dataobject.Tree(taxon_set=gene_tree_taxon_set)
    gene_tree.is_rooted = True

    pop_node_genes = {}
    pop_gene_taxa = gene_to_containing_taxon_map.reverse
    for nd in containing_tree.postorder_node_iter():
        if nd.taxon and nd.taxon in pop_gene_taxa:
            pop_node_genes[nd] = []
            gene_taxa = pop_gene_taxa[nd.taxon]
            for gene_taxon in gene_taxa:
                gene_node = dataobject.Node()
                gene_node.taxon = gene_taxon
                pop_node_genes[nd].append(gene_node)
            # gene_nodes = [dataobject.Node() for i in range(len(gene_taxa))]
            # for gidx, gene_node in enumerate(gene_nodes):
            #    gene_node.taxon = gene_taxa[gidx]
            #    pop_node_genes[nd].append(gene_node)

    for edge in containing_tree.postorder_edge_iter():

        if edge_pop_size_attr and hasattr(edge, edge_pop_size_attr):
            pop_size = getattr(edge, edge_pop_size_attr)
        else:
            pop_size = default_pop_size
        if edge.head_node.parent_node is None:
            if len(pop_node_genes[edge.head_node]) > 1:
                final = coalescent.coalesce(
                    nodes=pop_node_genes[edge.head_node], pop_size=default_pop_size, period=None, rng=rng
                )
            else:
                final = pop_node_genes[edge.head_node]
            gene_tree.seed_node = final[0]
        else:
            uncoal = coalescent.coalesce(
                nodes=pop_node_genes[edge.head_node], pop_size=pop_size, period=edge.length, rng=rng
            )
            if edge.tail_node not in pop_node_genes:
                pop_node_genes[edge.tail_node] = []
            pop_node_genes[edge.tail_node].extend(uncoal)

    gene_tree.pop_node_genes = pop_node_genes
    return gene_tree
Esempio n. 7
0
def constrained_kingman(pop_tree,
                        gene_tree_list=None,
                        rng=None,
                        gene_node_label_func=None,
                        num_genes_attr='num_genes',
                        pop_size_attr='pop_size',
                        decorate_original_tree=False):
    """
    Given a population tree, `pop_tree` this will return a *pair of
    trees*: a gene tree simulated on this population tree based on
    Kingman's n-coalescent, and population tree with the additional
    attribute 'gene_nodes' on each node, which is a list of
    uncoalesced nodes from the gene tree associated with the given
    node from the population tree.

    `pop_tree` should be a DendroPy Tree object or an object
    of a class derived from this with the following attribute
    `num_genes` -- the number of gene samples from each population in the
    present.  Each edge on the tree should also have the attribute

    `pop_size_attr` is the attribute name of the edges of `pop_tree` that
    specify the population size. By default it is `pop_size`. The should
    specify the effective *haploid* population size; i.e., number of gene
    in the population: 2 * N in a diploid population of N individuals,
    or N in a haploid population of N individuals.

    If `pop_size` is 1 or 0 or None, then the edge lengths of `pop_tree` is
    taken to be in haploid population units; i.e. where 1 unit equals 2N
    generations for a diploid population of size N, or N generations for a
    haploid population of size N. Otherwise the edge lengths of `pop_tree` is
    taken to be in generations.

    If `gene_tree_list` is given, then the gene tree is added to the
    tree block, and the tree block's taxa block will be used to manage
    the gene tree's `taxa`.

    `gene_node_label_func` is a function that takes two arguments (a string
    and an integer, respectively, where the string is the containing species
    taxon label and the integer is the gene index) and returns a label for
    the corresponding the gene node.

    if `decorate_original_tree` is True, then the list of uncoalesced nodes at
    each node of the population tree is added to the original (input) population
    tree instead of a copy.

    Note that this function does very much the same thing as `contained_coalescent()`,
    but provides a very different API.
    """

    # get our random number generator
    if rng is None:
        rng = GLOBAL_RNG  # use the global rng by default

    if gene_tree_list is not None:
        gtaxa = gene_tree_list.taxon_set
    else:
        gtaxa = dataobject.TaxonSet()

    if gene_node_label_func is None:
        gene_node_label_func = lambda x, y: "%s_%02d" % (x, y)

    # we create a set of gene nodes for each leaf node on the population
    # tree, and associate those gene nodes to the leaf by assignment
    # of 'taxon'.
    for leaf_count, leaf in enumerate(pop_tree.leaf_iter()):
        gene_nodes = []
        for gene_count in range(getattr(leaf, num_genes_attr)):
            gene_node = dataobject.Node()
            gene_node.taxon = gtaxa.require_taxon(
                label=gene_node_label_func(leaf.taxon.label, gene_count + 1))
            gene_nodes.append(gene_node)
        leaf.gene_nodes = gene_nodes

    # We iterate through the edges of the population tree in post-order,
    # i.e., visiting child edges before we visit parent edges. For
    # each edge visited, we take the genes found in the child nodes,
    # and run the coalescent simulation on them attacheded by the length
    # of the edge. Any genes that have not yet coalesced at the end of
    # this period are added to the genes of the tail (parent) node of
    # the edge.

    if decorate_original_tree:
        working_poptree = pop_tree
    else:
        # start with a new (deep) copy of the population tree so as to not
        # to change the original tree
        working_poptree = copy.deepcopy(pop_tree)

    # start with a new tree
    gene_tree = dataobject.Tree()
    gene_tree.taxon_set = gtaxa
    for edge in working_poptree.postorder_edge_iter():

        # if mrca root, run unconstrained coalescent
        if edge.head_node.parent_node is None:
            if len(edge.head_node.gene_nodes) > 1:
                final = coalescent.coalesce(nodes=edge.head_node.gene_nodes,
                                            pop_size=pop_size,
                                            period=None,
                                            rng=rng)
            else:
                final = edge.head_node.gene_nodes
            gene_tree.seed_node = final[0]
        else:

            if hasattr(edge, pop_size_attr):
                pop_size = getattr(edge, pop_size_attr)
            else:
                # this means all our time will be in population units
                pop_size = 1

            uncoal = coalescent.coalesce(nodes=edge.head_node.gene_nodes,
                                         pop_size=pop_size,
                                         period=edge.length,
                                         rng=rng)
            if not hasattr(edge.tail_node, 'gene_nodes'):
                edge.tail_node.gene_nodes = []
            edge.tail_node.gene_nodes.extend(uncoal)

    gene_tree.is_rooted = True
    if gene_tree_list is not None:
        gene_tree_list.append(gene_tree)
        return gene_tree, working_poptree
    else:
        return gene_tree, working_poptree
Esempio n. 8
0
def contained_coalescent(containing_tree,
                         gene_to_containing_taxon_map,
                         edge_pop_size_attr="pop_size",
                         default_pop_size=1,
                         rng=None):
    """
    Returns a gene tree simulated under the coalescent contained within a
    population or species tree.

        `containing_tree`
            The population or species tree. If `edge_pop_size_map` is not None,
            and population sizes given are non-trivial (i.e., >1), then edge
            lengths on this tree are in units of generations. Otherwise edge
            lengths are in population units; i.e. 2N generations for diploid
            populations of size N, or N generations for diploid populations of
            size N.

        `gene_to_containing_taxon_map`
            A TaxonSetMapping object mapping Taxon objects in the
            `containing_tree` TaxonSet to corresponding Taxon objects in the
            resulting gene tree.

        `edge_pop_size_attr`
            Name of attribute of edges that specify population size. By default
            this is "pop_size". If this attribute does not exist,
            `default_pop_size` will be used.  The value for this attribute
            should be the haploid population size or the number of genes;
            i.e.  2N for a diploid population of N individuals, or N for a
            haploid population of N individuals. This value determines how
            branch length units are interpreted in the input tree,
            `containing_tree`.  If a biologically-meaningful value, then branch
            lengths on the `containing_tree` are properly read as generations.
            If not (e.g. 1 or 0), then they are in population units, i.e. where
            1 unit of time equals 2N generations for a diploid population of
            size N, or N generations for a haploid population of size N.
            Otherwise time is in generations. If this argument is None, then
            population sizes default to `default_pop_size`.

        `default_pop_size`
            Population size to use if `edge_pop_size_attr` is None or
            if an edge does not have the attribute. Defaults to 1.

    The returned gene tree will have the following extra attributes:

        `pop_node_genes`
            A dictionary with nodes of `containing_tree` as keys and a list of gene
            tree nodes that are uncoalesced as values.

    Note that this function does very much the same thing as
    `constrained_kingman()`, but provides a very different API.
    """

    if rng is None:
        rng = GLOBAL_RNG

    gene_tree_taxon_set = gene_to_containing_taxon_map.domain_taxon_set
    if gene_tree_taxon_set is None:
        gene_tree_taxon_set = dendropy.TaxonSet()
        for gene_taxa in pop_gene_taxa_map:
            for taxon in gene_taxa:
                gene_tree_taxon_set.add(taxon)
    gene_tree = dataobject.Tree(taxon_set=gene_tree_taxon_set)
    gene_tree.is_rooted = True

    pop_node_genes = {}
    pop_gene_taxa = gene_to_containing_taxon_map.reverse
    for nd in containing_tree.postorder_node_iter():
        if nd.taxon and nd.taxon in pop_gene_taxa:
            pop_node_genes[nd] = []
            gene_taxa = pop_gene_taxa[nd.taxon]
            for gene_taxon in gene_taxa:
                gene_node = dataobject.Node()
                gene_node.taxon = gene_taxon
                pop_node_genes[nd].append(gene_node)
            #gene_nodes = [dataobject.Node() for i in range(len(gene_taxa))]
            #for gidx, gene_node in enumerate(gene_nodes):
            #    gene_node.taxon = gene_taxa[gidx]
            #    pop_node_genes[nd].append(gene_node)

    for edge in containing_tree.postorder_edge_iter():

        if edge_pop_size_attr and hasattr(edge, edge_pop_size_attr):
            pop_size = getattr(edge, edge_pop_size_attr)
        else:
            pop_size = default_pop_size
        if edge.head_node.parent_node is None:
            if len(pop_node_genes[edge.head_node]) > 1:
                final = coalescent.coalesce(
                    nodes=pop_node_genes[edge.head_node],
                    pop_size=default_pop_size,
                    period=None,
                    rng=rng)
            else:
                final = pop_node_genes[edge.head_node]
            gene_tree.seed_node = final[0]
        else:
            uncoal = coalescent.coalesce(nodes=pop_node_genes[edge.head_node],
                                         pop_size=pop_size,
                                         period=edge.length,
                                         rng=rng)
            if edge.tail_node not in pop_node_genes:
                pop_node_genes[edge.tail_node] = []
            pop_node_genes[edge.tail_node].extend(uncoal)

    gene_tree.pop_node_genes = pop_node_genes
    return gene_tree
Esempio n. 9
0
    def simulate_contained_kingman(self,
            edge_pop_size_attr='pop_size',
            default_pop_size=1,
            label=None,
            rng=None):
        """
        Simulates and returns a "censored" (Kingman) neutral coalescence tree
        conditional on self.

            ``rng``
                Random number generator to use. If ``None``, the default will
                be used.

            ``edge_pop_size_attr``
                Name of attribute of self's edges that specify the population
                size. If this attribute does not exist, then the population
                size is taken to be 1.

        Note that all edge-associated taxon sets must be up-to-date (otherwise,
        ``build_edge_taxa_sets()`` should be called), and that the tree
        is *not* added to the set of embedded trees. For the latter, call
        ``embed_contained_kingman``.
        """

        # Dictionary that maps nodes of containing tree to list of
        # corresponding nodes on gene tree, initially populated with leaf
        # nodes.
        embedded_nodes = {}
        for nd in self.leaf_iter():
            embedded_nodes[nd] = []
            for gt in nd.edge.embedded_taxa:
                gn = dataobject.Node(taxon=gt)
                embedded_nodes[nd].append(gn)

        # Generate the tree structure
        for edge in self.postorder_edge_iter():
            if edge.head_node.parent_node is None:
                # root: run unconstrained coalescence until just one gene node
                # remaining
                if hasattr(edge, edge_pop_size_attr):
                    pop_size = getattr(edge, edge_pop_size_attr)
                else:
                    pop_size = default_pop_size
                if len(embedded_nodes[edge.head_node]) > 1:
                    final = coalescent.coalesce(nodes=embedded_nodes[edge.head_node],
                            pop_size=pop_size,
                            period=None,
                            rng=rng)
                else:
                    final = embedded_nodes[edge.head_node]
            else:
                # run until next coalescence event, as determined by this edge
                # size.
                if hasattr(edge, edge_pop_size_attr):
                    pop_size = getattr(edge, edge_pop_size_attr)
                else:
                    pop_size = default_pop_size
                remaining = coalescent.coalesce(nodes=embedded_nodes[edge.head_node],
                        pop_size=pop_size,
                        period=edge.length,
                        rng=rng)
                try:
                    embedded_nodes[edge.tail_node].extend(remaining)
                except KeyError:
                    embedded_nodes[edge.tail_node] = remaining

        # Create and return the full tree
        embedded_tree = dataobject.Tree(taxon_set=self.embedded_taxon_set, label=label)
        embedded_tree.seed_node = final[0]
        embedded_tree.is_rooted = True
        return embedded_tree
Esempio n. 10
0
    def simulate_contained_kingman(self,
                                   edge_pop_size_attr='pop_size',
                                   default_pop_size=1,
                                   label=None,
                                   rng=None):
        """
        Simulates and returns a "censored" (Kingman) neutral coalescence tree
        conditional on self.

            ``rng``
                Random number generator to use. If ``None``, the default will
                be used.

            ``edge_pop_size_attr``
                Name of attribute of self's edges that specify the population
                size. If this attribute does not exist, then the population
                size is taken to be 1.

        Note that all edge-associated taxon sets must be up-to-date (otherwise,
        ``build_edge_taxa_sets()`` should be called), and that the tree
        is *not* added to the set of contained trees. For the latter, call
        ``embed_contained_kingman``.
        """

        # Dictionary that maps nodes of containing tree to list of
        # corresponding nodes on gene tree, initially populated with leaf
        # nodes.
        contained_nodes = {}
        for nd in self.leaf_iter():
            contained_nodes[nd] = []
            for gt in nd.edge.contained_taxa:
                gn = dataobject.Node(taxon=gt)
                contained_nodes[nd].append(gn)

        # Generate the tree structure
        for edge in self.postorder_edge_iter():
            if edge.head_node.parent_node is None:
                # root: run unconstrained coalescence until just one gene node
                # remaining
                if hasattr(edge, edge_pop_size_attr):
                    pop_size = getattr(edge, edge_pop_size_attr)
                else:
                    pop_size = default_pop_size
                if len(contained_nodes[edge.head_node]) > 1:
                    final = coalescent.coalesce(
                        nodes=contained_nodes[edge.head_node],
                        pop_size=pop_size,
                        period=None,
                        rng=rng)
                else:
                    final = contained_nodes[edge.head_node]
            else:
                # run until next coalescence event, as determined by this edge
                # size.
                if hasattr(edge, edge_pop_size_attr):
                    pop_size = getattr(edge, edge_pop_size_attr)
                else:
                    pop_size = default_pop_size
                remaining = coalescent.coalesce(
                    nodes=contained_nodes[edge.head_node],
                    pop_size=pop_size,
                    period=edge.length,
                    rng=rng)
                try:
                    contained_nodes[edge.tail_node].extend(remaining)
                except KeyError:
                    contained_nodes[edge.tail_node] = remaining

        # Create and return the full tree
        contained_tree = dataobject.Tree(taxon_set=self.contained_taxon_set,
                                         label=label)
        contained_tree.seed_node = final[0]
        contained_tree.is_rooted = True
        return contained_tree
Esempio n. 11
0
def constrained_kingman(pop_tree,
                        gene_trees_block=None,
                        node_factory=None,
                        tree_factory=None,
                        rng=None,
                        num_genes_attr='num_genes',
                        pop_size_attr='pop_size'):
    """
    Given a population tree, `pop_tree` this will return a *pair of
    trees*: a gene tree simulated on this population tree based on
    Kingman's n-coalescent, and population tree with the additional
    attribute 'gene_nodes' on each node, which is a list of
    uncoalesced nodes from the gene tree associated with the given
    node from the population tree.

    `pop_tree` should be a DendroPy Tree object or an object
    of a class derived from this with the following attribute
    `num_genes` -- the number of gene samples from each population in the
    present.  Each edge on the tree should also have the attribute
    `pop_size` -- the effective size of the population at this time.
    
    If `gene_trees_block` is given, then the gene tree is added to the 
    tree block, and the tree block's taxa block will be used to manage
    the gene tree's `taxa`.
    """

    # get our random number generator
    if rng is None:
        rng = GLOBAL_RNG  # use the global rng by default

    if gene_trees_block is not None:
        gtaxa = gene_trees_block.taxa_block
    else:
        gtaxa = taxa.TaxaBlock()

    # we create a set of gene nodes for each leaf node on the population
    # tree, and associate those gene nodes to the leaf by assignment
    # of 'taxon'.
    for leaf_count, leaf in enumerate(pop_tree.leaf_iter()):
        gene_nodes = []
        for gene_count in range(getattr(leaf, num_genes_attr)):
            if node_factory is not None:
                gene_node = node_factory()
            else:
                gene_node = trees.Node()
            gene_node.taxon = gtaxa.get_taxon(label=leaf.taxon.label + '_' +
                                              str(gene_count + 1))
            gene_nodes.append(gene_node)
        leaf.gene_nodes = gene_nodes

    # We iterate through the edges of the population tree in post-order,
    # i.e., visiting child edges before we visit parent edges. For
    # each edge visited, we take the genes found in the child nodes,
    # and run the coalescent simulation on them bounded by the length
    # of the edge. Any genes that have not yet coalesced at the end of
    # this period are added to the genes of the tail (parent) node of
    # the edge.

    # start with a new (deep) copy of the population tree so as to not
    # to change the original tree
    poptree_copy = copy.deepcopy(pop_tree)

    # start with a new tree
    if tree_factory is not None:
        gene_tree = tree_factory.new_tree()
    else:
        gene_tree = trees.Tree()
    for edge in poptree_copy.postorder_edge_iter():
        edge.head_node.gene_nodes = edge.head_node.gene_nodes

        # if mrca root, run unconstrained coalescent
        if edge.head_node.parent_node is None:
            if len(edge.head_node.gene_nodes) > 1:
                final = coalescent.coalesce(nodes=edge.head_node.gene_nodes,
                                            pop_size=pop_size,
                                            period=None,
                                            rng=rng)
            else:
                final = edge.head_node.gene_nodes
            gene_tree.seed_node = final[0]
        else:

            if hasattr(edge, pop_size_attr):
                pop_size = getattr(edge, pop_size_attr)
            else:
                # this means all our time will be in population units
                pop_size = 1

            uncoal = coalescent.coalesce(nodes=edge.head_node.gene_nodes,
                                         pop_size=pop_size,
                                         period=edge.length,
                                         rng=rng)
            if not hasattr(edge.tail_node, 'gene_nodes'):
                edge.tail_node.gene_nodes = []
            edge.tail_node.gene_nodes.extend(uncoal)

    if gene_trees_block is not None:
        gene_trees_block.append(gene_tree)

    return gene_tree, poptree_copy