Esempio n. 1
0
    def test_serialization(self):

        N, colors = 30, 5
        repeats = 20

        for _ in range(repeats):

            tree = random_colored_tree(N, colors)

            tree1 = tree.copy()

            tree.serialize('testfile_tree.pickle')
            tree.serialize('testfile_tree.json')
            tree2 = Tree.load('testfile_tree.pickle')
            tree3 = Tree.load('testfile_tree.json')
            os.remove('testfile_tree.pickle')
            os.remove('testfile_tree.json')

            tree_nodes = [v.label for v in tree.preorder()]
            tree1_nodes = [v.label for v in tree1.preorder()]
            tree2_nodes = [v.label for v in tree2.preorder()]
            tree3_nodes = [v.label for v in tree3.preorder()]

            self.assertListEqual(tree_nodes, tree1_nodes)
            self.assertListEqual(tree_nodes, tree2_nodes)
            self.assertListEqual(tree_nodes, tree3_nodes)
Esempio n. 2
0
def wrong_topology_matrix(OGT):
    """Return a wrong topology matrix by rearranging the edges of a binary tree."""

    distances = [v.dist for v in OGT.preorder()][1:]  # do not include root,
    if len(distances) % 2 != 0:  # observable gene tree (OGT)
        print("List of distances is not even!")  # is binary and not planted,
        return  # hence |E| should be even
    random.shuffle(distances)

    random_tree = Tree(TreeNode(label=0, dist=0.0))
    id_counter = 1
    current_leaves = [random_tree.root]

    while distances:
        v = current_leaves.pop(random.randint(0, len(current_leaves) - 1))
        dist1, dist2 = distances.pop(), distances.pop()
        new_child1 = TreeNode(label=id_counter, dist=dist1)
        new_child2 = TreeNode(label=id_counter + 1, dist=dist2)
        v.add_child(new_child1)
        v.add_child(new_child2)
        current_leaves.extend(v.children)
        id_counter += 1

    random_leaves = [l for l in random_tree.leaves()
                     ]  # implicit random bijection
    random.shuffle(random_leaves)  # to original tree

    _, D = distance_matrix(random_tree, leaf_order=random_leaves)
    return D
Esempio n. 3
0
def _yule(N, birth_rate):

    if birth_rate is None:
        birth_rate = 1.0
    elif birth_rate <= 0.0:
        raise ValueError("birth rate must be >0")

    tree = Tree(TreeNode(label=0, event='S', dist=0.0, tstamp=0.0))
    tree.number_of_species = N

    branches = [(1, tree.root)]
    forward_time = 0.0
    node_counter = 1

    while len(branches) < N:

        rate = len(branches) * birth_rate
        forward_time += np.random.exponential(1 / rate)

        i = np.random.randint(len(branches))
        branch_id, parent = branches[i]
        spec_node = TreeNode(label=branch_id,
                             event='S',
                             dist=forward_time - parent.tstamp,
                             tstamp=forward_time)
        parent.add_child(spec_node)
        branches[i] = (node_counter, spec_node)
        branches.append((node_counter + 1, spec_node))
        node_counter += 2

    # add length for pendant branches (cf. Hartmann et al. 2010)
    forward_time += np.random.exponential(1 / rate)

    # finalize the branches
    for branch_id, parent in branches:
        parent.add_child(
            TreeNode(label=branch_id,
                     event='S',
                     dist=forward_time - parent.tstamp,
                     tstamp=forward_time))

    _reverse_time_stamps(tree)

    return tree
def load(directory):

    files = glob.glob(directory + '/scenario*.pickle')
    trees = []

    for i in range(len(files)):
        with open('{}/scenario{}.pickle'.format(directory, i), 'rb') as f:
            T_nx = pickle.load(f)
            T = Tree.parse_nx(*T_nx)
            trees.append(T)

    return trees
Esempio n. 5
0
def _finalize(tree, G):
    
    if not tree:
        return None
    
    if isinstance(tree, TreeNode):
        tree = Tree(tree)
    
    # assign colors to the leaves
    reconstruct_color_from_graph(tree, G)
    
    return tree
Esempio n. 6
0
def _yule_age(age, birth_rate):

    if birth_rate is None:
        birth_rate = 1.0
    elif birth_rate <= 0.0:
        raise ValueError("birth rate must be >0")

    tree = Tree(TreeNode(label=0, event='S', dist=0.0, tstamp=0.0))

    branches = [(1, tree.root)]
    forward_time = 0.0
    node_counter = 1

    while forward_time < age:

        rate = len(branches) * birth_rate
        forward_time += np.random.exponential(1 / rate)

        # do not branch if age is already reached
        if forward_time >= age:
            break

        i = np.random.randint(len(branches))
        branch_id, parent = branches[i]
        spec_node = TreeNode(label=branch_id,
                             event='S',
                             dist=forward_time - parent.tstamp,
                             tstamp=forward_time)
        parent.add_child(spec_node)
        branches[i] = (node_counter, spec_node)
        branches.append((node_counter + 1, spec_node))
        node_counter += 2

    # finalize the branches
    for branch_id, parent in branches:
        parent.add_child(
            TreeNode(label=branch_id,
                     event='S',
                     dist=age - parent.tstamp,
                     tstamp=age))

    # reverse such that t(root) = age and t(leaves) = 0.0
    _reverse_time_stamps(tree)

    return tree
Esempio n. 7
0
def correct_bmg(bmg_original):
    """Build the LRT (using min cut in BUILD algorithm) and return its BMG."""
    
    subtrees = []
    for sg in (bmg_original.subgraph(c)
               for c in nx.weakly_connected_components(bmg_original)):
        tree = lrt_from_colored_graph(sg, mincut=True)
        if tree:
            subtrees.append(tree)
            
    if len(subtrees) == 0:
        return None
    elif len(subtrees) == 1:
        tree = subtrees[0]
    else:
        tree = Tree(TreeNode())
        for subtree in subtrees:
            tree.root.add_child(subtree.root)
    
    return bmg_from_tree(tree)
def simulate():
    
    for i in range(*repeats):
        
        for d in distances:
            
            root = TreeNode(label=0, dist=0.0)
            child = TreeNode(label=1, dist=d)
            root.add_child(child)
            T = Tree(root)
            
            for subst_model in subst_models:
                
                evolver = Evolver(subst_model, jump_chain=False)
                
                for l in lengths:
                    
                    outfile = "{}/{}_{}_{}_{}.phylip".format(directory,
                                                             subst_model.model_name,
                                                             d, l, i)
                    print("Processing '{}'".format(outfile))
                    
                    evolver.evolve_along_tree(T, start_length=l)
                    if subst_model.model_name in ('WAG', 'JTT'):
                        
                        # treepuzzle accepts only >= 3 sequences
                        alignment = evolver.true_alignment(include_inner=True)
                        for k, v in alignment.items():
                            seq = v
                            break
                        alignment[TreeNode(label=2)] = seq
                        alignment[TreeNode(label=3)] = seq
                        
                        write_alignment(outfile, alignment,
                                        alignment_format='phylip')
                        
                    else:
                        evolver.true_alignment(include_inner=True,
                                               write_to=outfile)
Esempio n. 9
0
 def _initiatialize_tree(self):
     
     if len(self.S.root.children) > 1:
         # root is a speciation event
         root = TreeNode(label=0, event='S',
                         color=self.S.root.label, 
                         dist=0.0, tstamp=self.S.root.tstamp)
     else:                    
         # planted species tree
         root = TreeNode(label=0, event=None,
                         color=self.S.root.label,
                         dist=0.0, tstamp=self.S.root.tstamp)
         
     T = Tree(root)
     self.id_counter += 1
     self.spec_queue.popleft()
     
     if self._prohibit_extinction == 'per_species':
         rate = self.d + self.h
     else:
         rate = self.rate_sum
             
     for S_v in self.S.root.children:
         
         array_id = len(self.branches)
         new_branch = _Branch(self.id_counter, array_id, rate,
                              T.root, self.S.root, S_v, 0)
         self.ES_to_b[(self.S.root, S_v)].append(new_branch)
         self.branches.append(new_branch)
         self.id_counter += 1
         if self.S_subtree_survivors[S_v]:
             self.surv_non_loss_lineages.add(new_branch)
             
         self.total_rate += rate   
         self.total_surviving = 1
         
     return T
Esempio n. 10
0
def parse_newick(newick):
    """Parses trees in Newick format into object of type 'Tree'.
    
    Parameters
    ----------
    newick : str
        A tree in Newick format.
    
    Returns
    -------
    Tree
        The parsed tree.
    
    Raises
    ------
    TypeError
        If the input is not a string.
    ValueError
        If the input is not a valid Newick string.
    
    Notes
    -----
    Do not use this function for serialization and reloading Tree
    objects. Use the `serialize()` function instead.
    Labels and colors that are integer numbers are converted to int.
    The colors (if present in <...> in the string) are parsed as strings
    and need to be converted to integers afterwards if necessary.
    """

    # label<color>:distance
    label_col_dist_regex = re.compile(
        r"'?([a-zA-Z0-9_]*)'?<(.*)>:(-?[0-9]*\.?[0-9]*[Ee]?-?[0-9]+)")
    # label<color>
    label_col_regex = re.compile(r"'?([a-zA-Z0-9_]*)'?<(.*)>")
    # label:distance
    label_dist_regex = re.compile(
        r"'?([a-zA-Z0-9_]*)'?:(-?[0-9]*\.?[0-9]*[Ee]?-?[0-9]+)")

    def to_int(item):
        """Trys to convert the string into int."""

        return int(item) if item.isdigit() else item

    def parse_subtree(subroot, subtree_string):
        """Recursive function to parse the subtrees."""

        children = split_children(subtree_string)
        for child in children:
            node = TreeNode(event='')
            subroot.add_child(node)
            end = -1
            if child[0] == '(':  # the child has subtrees
                end = child.rfind(')')
                if end == -1:
                    raise ValueError('invalid Newick string')
                parse_subtree(node,
                              child[1:end])  # recursive call 'parse_subtree'
            child = child[end + 1:].strip()
            label_col_dist = label_col_dist_regex.match(child)
            if label_col_dist:  # CASE 1: label<color>:distance
                node.label = to_int(label_col_dist.group(1))
                node.color = to_int(label_col_dist.group(2))
                node.dist = float(label_col_dist.group(3))
            else:
                label_col = label_col_regex.match(child)
                label_dist = label_dist_regex.match(child)
                if label_col:  # CASE 2: label<color>
                    node.label = to_int(label_col.group(1))
                    node.color = to_int(label_col.group(2))
                    node.dist = 1.0
                elif label_dist:  # CASE 3: label:distance
                    node.label = to_int(label_dist.group(1))
                    node.color = None
                    node.dist = float(label_dist.group(2))
                else:  # CASE 4: label
                    node.label = to_int(child)
                    node.color = None
                    node.dist = 1.0
            # color is a tuple
            if node.color and isinstance(node.color,
                                         str) and node.color.find('-') != -1:
                split_color = node.color.split('-')
                node.color = (to_int(split_color[0]), to_int(split_color[1]))

    def split_children(child_string):
        """Splits a given string by all ',' that are not enclosed by parentheses."""

        stack = 0
        children = []
        current = ""
        for c in child_string:
            if (stack == 0) and c == ',':
                children.append(current)
                current = ""
            elif c == '(':
                stack += 1
                current += c
            elif c == ')':
                if stack <= 0:
                    raise ValueError('invalid Newick string')
                stack -= 1
                current += c
            else:
                current += c
        children.append(current.strip())
        return children

    if not isinstance(newick, str):
        raise TypeError("Newick parser needs a 'str' as input")
    end = newick.find(";")
    if end != -1:
        newick = newick[:end]
    temp_root = TreeNode(event='')
    parse_subtree(temp_root, newick)
    if temp_root.children:
        root = temp_root.children[0]
        root.dist = 0.0  # set distance of the root to 0
        root.detach()  # remove the parent temp_root
        # (important for non-recursive to_newick2)
        return Tree(root)
    else:
        raise ValueError('invalid Newick string')
Esempio n. 11
0
def random_colored_tree(N, colors, binary=False, force_all_colors=False):
    """Create a random colored tree.
     
     The number of leaves and the color labels are specified in the
     parameters `N` and `colors`, respectively. Each non-leaf node in the 
     resulting tree will have at least children (property of phylogenetic
     trees).
     
     Parameters
     ----------
     N : int
         The desired number of leaves.
     colors : int or list
         The list of colors, or the desired number of colors in which case
         the colors {1, ..., colors} are used.
     binary : bool, optional
         If True, forces the tree to be binary (the default is False).
     force_all_colors : bool
         If True, the resulting tree is guaranteed to have at least one leaf
         of each color (the default is False).
     
     Returns
     -------
     Tree
         A random tree with `N` leaves and random leaf coloring.
     
     Raises
     ------
     TypeError
         If `N` is not an integer > 0.
     ValueError
         If the number of colors is greater than `N` and `force_all_colors`
         is true.
     """

    tree = Tree.random_tree(N, binary=binary)

    if isinstance(colors, int):
        colors = [i + 1 for i in range(colors)]
    elif not isinstance(colors, collections.abc.Iterable):
        raise TypeError("'colors' must be of type 'int' or iterable")

    if len(colors) > N and force_all_colors:
        raise ValueError('cannot force all colors since #colors > N')

    leaves = [l for l in tree.leaves()]

    if force_all_colors:
        # use every color at least once
        permutation = np.random.permutation(len(leaves))
        for i in range(len(leaves)):
            if i < len(colors):
                leaves[permutation[i]].color = colors[i]
            else:
                # color the remaining leaves randomly
                leaves[permutation[i]].color = random.choice(colors)
    else:
        # assign colors completely randomly
        for leaf in leaves:
            leaf.color = random.choice(colors)

    return tree
Esempio n. 12
0
def _EBDP_age_forward(age, episodes):
    """Episodic birth–death process (EBDP), forward algorithm with max. age."""

    tree = Tree(TreeNode(label=0, event='S', dist=0.0, tstamp=0.0))

    branches = [(1, tree.root)]
    forward_time = 0.0
    node_counter = 1
    i = 0  # current episode

    # may lead to extinction of the single branch at time t=0
    _EBDP_mass_extinction(branches, episodes[i][2], episodes[i][3])

    while forward_time < age:
        birth_rate, death_rate, *_ = episodes[i]

        rate = len(branches) * (birth_rate + death_rate)
        waiting_time = np.random.exponential(
            1 / rate) if rate > 0.0 else float('inf')

        if i + 1 < len(episodes) and forward_time + waiting_time >= episodes[
                i + 1][3]:
            _EBDP_mass_extinction(branches, episodes[i + 1][2],
                                  episodes[i + 1][3])
            forward_time = episodes[i + 1][3]
            i += 1

        elif forward_time + waiting_time >= age:
            break

        else:
            forward_time += waiting_time

            j = np.random.randint(len(branches))
            branch_id, parent = branches[j]

            if birth_rate > np.random.uniform(low=0.0,
                                              high=birth_rate + death_rate):
                # speciation event drawn
                spec_node = TreeNode(label=branch_id,
                                     event='S',
                                     dist=forward_time - parent.tstamp,
                                     tstamp=forward_time)
                parent.add_child(spec_node)
                branches[j] = (node_counter, spec_node)
                branches.append((node_counter + 1, spec_node))
                node_counter += 2
            else:
                # extinction event drawn
                loss_node = TreeNode(label=branch_id,
                                     event='L',
                                     dist=forward_time - parent.tstamp,
                                     tstamp=forward_time)
                parent.add_child(loss_node)
                branches.pop(j)

    # finalize the (surviving) branches
    for branch_id, parent in branches:
        parent.add_child(
            TreeNode(label=branch_id,
                     event='S',
                     dist=age - parent.tstamp,
                     tstamp=age))

    # reverse such that t(root) = age and t(surviving leaves) = 0.0
    for v in tree.preorder():
        v.tstamp = age - v.tstamp

    return tree
Esempio n. 13
0
def _EBDP_backward(N, episodes, max_tries=500):
    """Episodic birth–death process (EBDP), backward algorithm by Stadler 2001."""

    birth_inv_sum = sum([1 / episodes[i][0] for i in range(len(episodes))])

    for _ in range(max_tries):

        tree = None
        t = 0.0
        i = 0

        branches = [
            TreeNode(label=j, event='S', dist=0.0, tstamp=t) for j in range(N)
        ]
        id_counter = N

        while branches:
            birth_i, death_i, rho_i, t_i = episodes[i]

            losses_to_add = round(len(branches) / rho_i) - len(branches)
            for j in range(losses_to_add):
                branches.append(
                    TreeNode(label=id_counter, event='L', dist=0.0, tstamp=t))
            id_counter += losses_to_add

            while branches:
                w = np.random.exponential(
                    1 / ((birth_i + death_i) * len(branches)))

                if i + 1 < len(episodes) and t + w > episodes[i + 1][3]:
                    t = episodes[i + 1][3]
                    i += 1
                    break

                else:
                    t += w

                    if birth_i > np.random.uniform(low=0.0,
                                                   high=birth_i + death_i):
                        # speciation event drawn
                        spec_node = TreeNode(label=id_counter,
                                             event='S',
                                             dist=0.0,
                                             tstamp=t)
                        id_counter += 1
                        if len(branches) > 1:
                            k, l = np.random.choice(len(branches),
                                                    2,
                                                    replace=False)
                            if k > l:
                                k, l = l, k
                            spec_node.add_child(branches[k])
                            spec_node.add_child(branches[l])
                            branches[k] = spec_node
                            branches.pop(l)
                        else:
                            spec_node.add_child(branches[0])
                            tree = Tree(spec_node)
                            branches.clear()
                    else:
                        # extinction event drawn
                        branches.append(
                            TreeNode(label=id_counter,
                                     event='L',
                                     dist=0.0,
                                     tstamp=t))
                        id_counter += 1

        # return tree with the following probability
        if np.random.random() < (1 / birth_i) / birth_inv_sum:

            for v in tree.preorder():
                if v.parent:
                    v.dist = v.parent.tstamp - v.tstamp

            return tree

    print("Could not return a tree after {} simulations!".format(max_tries),
          file=sys.stderr)
Esempio n. 14
0
def simulate_species_tree(N,
                          model='innovation',
                          non_binary_prob=0.0,
                          planted=True,
                          remove_extinct=False,
                          rescale_to_height=None,
                          **kwargs):
    """Simulates a species tree S with N leaves.
    
    Keyword parameters:
        model -- simulation model to be applied; default is 'innovation'
        non_binary_prob -- probability that an inner edge is contracted;
            results in non-binary tree; default is 0.0
        planted -- add a planted root that has the canonical root as its
            single neighbor; default is True
        remove_extinct -- remove all branches that lead to extinctions, only
            relevant for some models; default is False
        rescale_to_height -- determines the final distance from the root to the
            (surviving) leaves, default is None, i.e. model dependent
    """

    # parameter checking
    if not isinstance(N, int) or N < 0:
        raise ValueError('N must be an int >=0')
    elif N == 0:
        return Tree(None)

    if not isinstance(model, str):
        raise ValueError("model must be of type 'str'")

    if non_binary_prob < 0.0 or non_binary_prob > 1.0:
        raise ValueError('contraction prob. must be in [0.0, 1.0]')

    if (rescale_to_height is not None
            and (not isinstance(rescale_to_height,
                                (int, float)) or rescale_to_height < 0.0)):
        raise ValueError('height must be a number >=0')
    elif rescale_to_height is not None and N == 1 and not planted:
        raise ValueError('rescaling is not applicable to unplanted trees '\
                         'with only one leaf')

    # choice of the main simulation algorithm
    if model.lower() in ('innovation', 'innovations'):
        tree = _innovation_model(N, planted)
    elif model.lower() == 'yule':
        tree = _yule(N, kwargs.get('birth_rate'))
    elif model.upper() == 'BDP':
        tree = _BDP(N, **kwargs)
    elif model.upper() == 'EBDP':
        tree = _EBDP(N, **kwargs)
    else:
        raise ValueError("model '{}' is not available".format(model))

    # remove extinct branches for models that include losses
    if remove_extinct and model.upper() in ('BDP', 'EBDP'):
        delete_losses_and_contract(tree, inplace=True)

    # remove planted edge for models that are planted by construction
    if not planted and model.upper() in ('YULE', 'BDP', 'EBDP'):
        remove_planted_root(tree, inplace=True)

    # make tree non_binary by random contraction of edges
    if non_binary_prob > 0.0:
        edges = _select_edges_for_contraction(tree,
                                              non_binary_prob,
                                              exclude_planted_edge=True)
        tree.contract(edges)

    # rescale to specified height
    if rescale_to_height is not None:
        _rescale(tree, rescale_to_height, inplace=True)

    return tree
Esempio n. 15
0
def _innovation_model(N, planted, ultrametric=True):
    """Builds a species tree S with N leaves with the innovation model.
    
    Keyword arguments:
        ultrametric - if True make tree ultrametric and rescale it to
            height 1.0, else all edges have length 1.0; default is True
    """

    tree = Tree(TreeNode(label=0, event='S'))
    tree.number_of_species = N
    node_counter = 1

    # planted tree (root is an implicit outgroup with outdegree = 1)
    if planted:
        root = TreeNode(label=1, event='S')
        tree.root.add_child(root)
        node_counter += 1
    else:
        root = tree.root

    features = [0]  # set of available features
    species = {(0, ): root}  # extant species

    while len(species) < N:

        loss_candidates = set()  # species for which loss of a feature
        for s in species.keys():  # can trigger a speciation
            for i in range(0, len(s)):
                if s[:i] + s[i + 1:] not in species:
                    loss_candidates.add(s)

        if not loss_candidates:  # INNOVATION EVENT
            s = random.choice(list(species))
            new_feature = len(features)

            new_s = s + (new_feature, )

            child1 = TreeNode(label=node_counter, event='S')
            species[s].add_child(child1)
            child2 = TreeNode(label=node_counter + 1, event='S')
            species[s].add_child(child2)

            node_counter += 2

            species[s] = child1
            species[new_s] = child2
            features.append(new_feature)

        else:
            s = random.choice(list(loss_candidates))

            if len(s) > 1:
                feature_index = random.randint(0, len(s) - 1)
            else:
                feature_index = 0

            new_s = s[:feature_index] + s[feature_index + 1:]

            if new_s not in species:  # LOSS EVENT

                child1 = TreeNode(label=node_counter, event='S')
                species[s].add_child(child1)
                child2 = TreeNode(label=node_counter + 1, event='S')
                species[s].add_child(child2)

                node_counter += 2

                species[s] = child1
                species[new_s] = child2

    if ultrametric:
        simulate_timing(tree)
        distance_from_timing(tree)

    return tree