Python TreeNode.TreeNode Beispiele, ete3.TreeNode.TreeNode Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: ASTree.py Projekt: BongoKnight/Semantic

    def c_toArbre(self):

        if self.value == "=":
            n = TreeNode()
            n.name = self.value

            n1 = TreeNode()
            n1.name = "Id : " + self.sons[0]
            n2 = self.sons[1].e_toArbre()
            n.add_child(n1)
            n.add_child(n2)

            return n

        elif self.value == ';':
            n = TreeNode()
            n.name = self.value

            n1 = self.sons[0].c_toArbre()
            n2 = self.sons[1].c_toArbre()
            n.add_child(n1)
            n.add_child(n2)

            return n

        else:
            n = TreeNode()
            n.name = self.value

            n1 = self.sons[0].e_toArbre()
            n2 = self.sons[1].c_toArbre()
            n.add_child(n1)
            n.add_child(n2)

            return n

Beispiel #2

0

Datei anzeigen

Datei: utils.py Projekt: alexanderwhatley/Stanford-Project-Code

def recreate_tree(tree, num_layers=None, color=True):
    # build tree with same topology but without the coordinate and metadata labels
    # use color_dict to color nodes the appropriate colors
    new_tree = TreeNode(name=tree.name)
    #new_tree = TreeNodeHashable(name = tree.name)
    new_tree.img_style['size'] = 10
    if color:
        new_tree.img_style['fgcolor'] = tree.color
    new_tree.img_style['shape'] = 'sphere'
    old_layer = [tree]
    new_layer = [new_tree]
    layer_num = 0
    while old_layer:
        next_old_layer, next_new_layer = [], []
        for ind, node in enumerate(old_layer):
            for child in node.children:
                next_old_layer.append(child)
                new_child = TreeNode(name=child.name)
                new_child.img_style['size'] = 10
                if color:
                    new_child.img_style['fgcolor'] = child.color
                new_child.img_style['shape'] = 'sphere'
                new_layer[ind].add_child(new_child)
                next_new_layer.append(new_child)
        old_layer = next_old_layer
        new_layer = next_new_layer
        layer_num += 1
        if num_layers is not None and layer_num == num_layers:
            break

    return new_tree

Beispiel #3

0

Datei anzeigen

def plot_marker_tree(tree, marker, resize_nodes=False, save=True):
    supplementary_data = pd.read_csv('../Suppl.Table2.CODEX_paper_MRLdatasetexpression.csv')
    supplementary_data.rename(columns={'X.X': 'X', 'Y.Y': 'Y', 'Z.Z': 'Z'}, inplace=True)
    supplementary_data['CD45_int'] = supplementary_data['CD45'].astype(int)
    ids_to_names = pd.read_csv('ClusterIDtoName.txt', sep='\t')
    cell_lines = list(ids_to_names['ID'].values)
    ids_to_names = dict(zip(ids_to_names['ID'].values, ids_to_names['Name'].values))
    # remove dirt from supplementary data 
    supplementary_annotations = pd.read_excel('../Suppl.Table2.cluster annotations and cell counts.xlsx')
    dirt = supplementary_annotations.loc[supplementary_annotations['Imaging phenotype (cell type)'] == 'dirt', 
                                         'X-shift cluster ID']
    supplementary_data = supplementary_data[~supplementary_data['Imaging phenotype cluster ID'].isin(dirt)]
    supplementary_data['sample'] = supplementary_data['sample_Xtile_Ytile'].apply(lambda x: x.split('_')[0])
    suppl_converted = convert_coordinates(supplementary_data)[['X', 'Y', 'Z', 'sample', marker]]
    
    new_tree = TreeNode(name = tree.name)
    new_tree.img_style['size'] = 1 if resize_nodes else 10
    new_tree.img_style['fgcolor'] = hls2hex(0, 0, 0)
    new_tree.img_style['shape'] = 'sphere'
    
    marker_avgs = []
    old_layer = [tree]
    new_layer = [new_tree]
    layer_num = 0
    while old_layer:
        next_old_layer, next_new_layer = [], []
        for ind, node in enumerate(old_layer):
            for child in node.children:
                next_old_layer.append(child)
                new_child = TreeNode(name = child.name)
                marker_avg = get_node_markers(child, marker, suppl_converted)
                new_child.add_features(marker_avg=marker_avg)
                marker_avgs.append(marker_avg)
                new_layer[ind].add_child(new_child)
                next_new_layer.append(new_child)
        old_layer = next_old_layer
        new_layer = next_new_layer
        layer_num += 1
        
    marker_min, marker_max = np.min(marker_avgs), np.max(marker_avgs)
    for node in new_tree.iter_descendants():
        norm_marker = (node.marker_avg - marker_min) / (marker_max - marker_min)
        node.add_features(marker_avg=norm_marker)
        node.add_features(color=hls2hex(0, norm_marker, norm_marker*0.5))
        
    for node in new_tree.iter_descendants():
        node.img_style['size'] = 1 + 10 * node.marker_avg if resize_nodes else 10
        node.img_style['fgcolor'] = node.color
        node.img_style['shape'] = 'sphere'
        
    ts = TreeStyle()
    ts.show_leaf_name = False
    ts.rotation = 90
    ts.title.add_face(TextFace(marker, fsize=20), column=0)
    save_dir = 'Marker_Trees' if resize_nodes else 'Marker_Trees_Same_Size'
        
    if save:
        new_tree.render(save_dir + '/marker_tree_{}.png'.format(marker), tree_style=ts)
    else:
        return new_tree.render('%%inline', tree_style=ts)

Beispiel #4

0

Datei anzeigen

def addDeadLineage(spTree):
    """ 
    Takes:
        - spTree (ete3.Tree) : species tree

    Returns:
        (ete3.Tree) : same tree with a dead lineage (name "-1") as outgroup
                      AND all nodes have a "dead" feature (bool that is True only for the dead lineage and the new root)
    """

    newSpTree = deepcopy(spTree)
    newSpTree.dist = 0.1

    for n in newSpTree.traverse():
        n.add_feature("dead",False)

    newRoot = TreeNode()
    newRoot.add_feature("dead",True)
    newRoot.dist = 0.0


    newRoot.add_child(newSpTree)
    rootHeight = newRoot.get_distance(newRoot.get_leaves()[0])

    deadLineage = TreeNode()
    deadLineage.add_feature("dead",True)    
    deadLineage.name = "-1"
    deadLineage.dist  = rootHeight

    newRoot.add_child(deadLineage)

    return newRoot

Beispiel #5

0

Datei anzeigen

def read_tree(infile, format, quiet=False):
    if infile=='-':
        nwk_string = sys.stdin.readlines()[0]
        tree = TreeNode(newick=nwk_string, format=format, quoted_node_names=True)
    else:
        tree = TreeNode(newick=infile, format=format, quoted_node_names=True)
    if not quiet:
        num_leaves = len([ n for n in tree.traverse() if n.is_leaf() ])
        sys.stderr.write('number of leaves in input tree: {:,}\n'.format(num_leaves))
    return tree

Beispiel #6

0

Datei anzeigen

    def DFS_get_tree(root, par_node):
        results = get_info(root)
        par_node.name = results[0]
        if len(results) == 1:
            # par node is a leaf, end

            return
        elif len(results) == 3:
            name, l, r = results
            l_node = TreeNode()
            r_node = TreeNode()
            par_node.add_child(l_node)
            par_node.add_child(r_node)
            return DFS_get_tree(l, l_node), DFS_get_tree(r, r_node)

Beispiel #7

0

Datei anzeigen

 def compare(self, tree2, method='identity'):
     '''compare this tree to the other tree'''
     if method == 'identity':
         # we compare lists of seq, parent, abundance
         # return true if these lists are identical, else false
         list1 = sorted((node.sequence, node.frequency,
                         node.up.sequence if node.up is not None else None)
                        for node in self.tree.traverse())
         list2 = sorted((node.sequence, node.frequency,
                         node.up.sequence if node.up is not None else None)
                        for node in tree2.tree.traverse())
         return list1 == list2
     elif method == 'MRCA':
         # matrix of hamming distance of common ancestors of taxa
         # takes a true and inferred tree as CollapsedTree objects
         taxa = [
             node.sequence for node in self.tree.traverse()
             if node.frequency
         ]
         n_taxa = len(taxa)
         d = scipy.zeros(shape=(n_taxa, n_taxa))
         sum_sites = scipy.zeros(shape=(n_taxa, n_taxa))
         for i in range(n_taxa):
             nodei_true = self.tree.iter_search_nodes(
                 sequence=taxa[i]).next()
             nodei = tree2.tree.iter_search_nodes(sequence=taxa[i]).next()
             for j in range(i + 1, n_taxa):
                 nodej_true = self.tree.iter_search_nodes(
                     sequence=taxa[j]).next()
                 nodej = tree2.tree.iter_search_nodes(
                     sequence=taxa[j]).next()
                 MRCA_true = self.tree.get_common_ancestor(
                     (nodei_true, nodej_true)).sequence
                 MRCA = tree2.tree.get_common_ancestor(
                     (nodei, nodej)).sequence
                 d[i, j] = hamming_distance(MRCA_true, MRCA)
                 sum_sites[i, j] = len(MRCA_true)
         return d.sum() / sum_sites.sum()
     elif method == 'RF':
         tree1_copy = self.tree.copy(method='deepcopy')
         tree2_copy = tree2.tree.copy(method='deepcopy')
         for treex in (tree1_copy, tree2_copy):
             for node in list(treex.traverse()):
                 if node.frequency > 0:
                     child = TreeNode()
                     child.add_feature('sequence', node.sequence)
                     node.add_child(child)
         try:
             return tree1_copy.robinson_foulds(tree2_copy,
                                               attr_t1='sequence',
                                               attr_t2='sequence',
                                               unrooted_trees=True)[0]
         except:
             return tree1_copy.robinson_foulds(tree2_copy,
                                               attr_t1='sequence',
                                               attr_t2='sequence',
                                               unrooted_trees=True,
                                               allow_dup=True)[0]
     else:
         raise ValueError('invalid distance method: ' + method)

Beispiel #8

0

Datei anzeigen

Datei: tree.py Projekt: shinu89/pyearley

        def _build(parse_node, tree_node=None):
            if tree_node is None:
                tree_node = TreeNode()

            if isinstance(parse_node, list):
                print(parse_node)

            if isinstance(parse_node, LeafNode):
                symbol = parse_node.literal
                token = parse_node.token

                tree_node.name = symbol
                tree_node.add_feature("tokens", [token])
            elif isinstance(parse_node, InternalNode):
                symbol = parse_node.symbol
                rule = parse_node.rule
                children = parse_node.children
                token = []

                for child_node in children:
                    node = _build(child_node)
                    tree_node.add_child(node)
                    token.extend(node.tokens)

                tree_node.name = symbol
                tree_node.add_feature("rule", rule)
                tree_node.add_feature("tokens", token)

            return tree_node

Beispiel #9

0

Datei anzeigen

def _convert_biotree_to_etetree(bio_tree):
    fhand = io.StringIO()
    write_newick([bio_tree], fhand)
    newick = fhand.getvalue()
    newick = re.sub("Inner[0-9]+:", ":", newick)
    ete_tree = TreeNode(newick)
    return (ete_tree)

Beispiel #10

0

Datei anzeigen

Datei: utils.py Projekt: alexanderwhatley/Stanford-Project-Code

def add_tree_layer(tree, leaves, clusters, proportions, child_coords,
                   prop_filter):
    '''
    tree: tree that we want to add an additional layer to
    leaves: leaves of tree 
    clusters: number of clusters in the child layer
    proportions: nested dictionary containing id of parent and id of child and the proportion of cells 
                 contained in the parent that are also contained in the child 
    prop_filter: proportion of cells for edge between clusters to be created 
    '''
    child_nodes = {}
    for ind in range(len(clusters)):
        child_node_id = clusters[ind]
        child_nodes[child_node_id] = TreeNode(name=child_node_id)
        # add coordinate data to node
        child_nodes[child_node_id].add_features(
            coords=child_coords[child_node_id])
        child_nodes[child_node_id].add_features(cluster_id=child_node_id)
    for child_node_id in proportions:
        # ensure that each child node is not added to more than one parent node
        proportions_child = proportions[child_node_id]
        max_node_id = max(proportions_child, key=proportions_child.get)
        if proportions_child[max_node_id] > prop_filter:
            parent_node = leaves[max_node_id]
            parent_node.add_child(child_nodes[child_node_id])

    return tree, child_nodes

Beispiel #11

0

Datei anzeigen

Datei: ASTree.py Projekt: BongoKnight/Semantic

    def p_toArbre(self):

        n = TreeNode()
        n.name = "main()"

        n1 = TreeNode()
        n1.name = str(self.sons[0])

        n2 = self.sons[1].c_toArbre()

        n3 = self.sons[2].e_toArbre()

        n.add_child(n1)
        n.add_child(n2)
        n.add_child(n3)

        return n

Beispiel #12

0

Datei anzeigen

Datei: ASTree.py Projekt: BongoKnight/Semantic

    def e_toArbre(self):

        if self.type == "NUMBER":
            n = TreeNode()
            n.name = "Number : " + str(self.value)
            return n

        elif self.type == "ID":
            n = TreeNode()
            n.name = "Id : " + self.value
            return n

        elif self.type == "OPBIN":
            n = TreeNode()
            n.name = self.value
            n1 = self.sons[0].e_toArbre()
            n2 = self.sons[1].e_toArbre()
            n.add_child(n1)
            n.add_child(n2)

            return n

Beispiel #13

0

Datei anzeigen

 def simplify_tree(self, tree):
     root_label = self._simplify_tree(tree)
     if tree.label in ['Arg1', 'Arg2', 'Conn', 'none']:
         tree.children = self.get_leave_node(tree)
         return
     for i, c in enumerate(tree.children):
         if self.deeperthan1(c):
             self.simplify_tree(c)
         else:
             n = TreeNode()
             n.children = [c]
             n.label = c.label
             tree.children[i] = n

Beispiel #14

0

Datei anzeigen

Datei: test_simple_functions.py Projekt: willboulton/phastSim

    def parameterised_test(mutDict, insertionDict, mutations, expected_output):
        f = MockFile()
        node = TreeNode(name="test_node")
        node.mutations = mutations

        original_mD = mutDict.copy()
        original_iD = insertionDict.copy()
        genome_tree.writeGenomeShortIndels(node=node,
                                           file=f,
                                           mutDict=mutDict,
                                           insertionDict=insertionDict)
        # the whole point of this function is that the genome tree updates and then de-updates
        # any mutations. So we need the mutDict and insertionDict to remain the same before and after printing.
        assert mutDict == original_mD
        assert insertionDict == original_iD
        assert f.written_data == expected_output  #

Beispiel #15

0

Datei anzeigen

 def initialize_pathogen_tree(self):
     """
     Initialize one pathogen lineage per host tip
     dist records height that pathogen lineage was started
     TODO: relax this assumption - needs some way to input
     """
     # reset containers
     self.extant_p = []  # pathogen lineages that have not coalesced
     self.not_yet_sampled_p = []  # pathogen lineages higher in the tree
     for i, host_tip in enumerate(self.hosttree.get_leaves()):
         pnode = TreeNode(name=host_tip.name + '_P', dist=0)
         pnode.add_features(height=host_tip.height, host=host_tip)
         if host_tip.height == 0:
             self.extant_p.append(pnode)
         else:
             self.not_yet_sampled_p.append(pnode)

Beispiel #16

0

Datei anzeigen

def copy_forest(forest, features=None):
    features = set(features if features else forest[0].features)
    copied_forest = []
    for tree in forest:
        copied_tree = TreeNode()
        todo = [(tree, copied_tree)]
        copied_forest.append(copied_tree)
        while todo:
            n, copied_n = todo.pop()
            copied_n.dist = n.dist
            copied_n.support = n.support
            copied_n.name = n.name
            for f in features:
                if hasattr(n, f):
                    copied_n.add_feature(f, getattr(n, f))
            for c in n.children:
                todo.append((c, copied_n.add_child()))
    return copied_forest

Beispiel #17

0

Datei anzeigen

Datei: tree.py Projekt: DulanjanaYasara/chatbot

    def creation_by_words(self, words):
        """
        Creation of a tree based on separate words in the word list
        :type words: list
        """
        # Creates an empty tree
        tree = Tree()
        tree.name = ""
        # Make sure there are no duplicates
        words = set(words)
        # Populate tree
        for word in words:
            # If no similar words exist, add it to the base of tree
            target = tree

            if self.is_reversed:
                words = list(reversed(split(r'[\s-]+|:[\\/]{2}', word)))
            else:
                words = split(r'[\s-]+|:[\\/]{2}', word)

            # Find relatives in the tree
            root = ''
            pos = 0
            for pos in xrange(len(words), -1, -1):
                root = ' '.join(words[:pos])
                if root in self.name2node:
                    target = self.name2node[root]
                    break

            # Add new nodes as necessary
            fullname = root
            for wd in words[pos:]:
                fullname = (fullname + ' ' + wd).strip()
                new_node = TreeNode(name=wd.strip(), dist=target.dist + 1)
                target.add_child(new_node)
                self.name2node[fullname] = new_node
                target = new_node

        return tree

Beispiel #18

0

Datei anzeigen

    def add_tree_to_distribution(self, tree):
        """
        Add the bipartition of a tree to the CCP distribution
        
        Takes:
            - tree (ete3.Tree): phylogenetic tree
            
        """

        if len(tree.children) == 3:
            ## special unrroted case where the tree begin by a trifurcation ...
            ## we artificially remove the trifurcation to avoid future problems
            a = TreeNode()
            b = tree.children[1]
            c = tree.children[2]
            b.detach()
            c.detach()
            tree.add_child(a)
            a.add_child(b)
            a.add_child(c)
            #print " special rerooting "

        for i in tree.traverse():
            if len(i.children) > 2:
                print "multifurcation detected! Please provide bifurcating trees."
                print "exiting now"
                exit(1)

        if self.nb_observation == 0:  ##no tree has been observed yet: add all the leaves
            for l in tree.get_leaf_names():
                self.get_leaf_id(l)  ##adds the leaves to the CCP

        for node in tree.traverse("postorder"):  ##for each branch of the tree
            self.add_tree_branch_to_distribution(node)

        self.nb_observation += 1

        return

Beispiel #19

0

Datei anzeigen

Datei: gctree_tools.py Projekt: m-vieira/bcr-phylo-benchmark

    def simulate(self):
        '''
        simulate a collapsed tree given params
        replaces existing tree data member with simulation result, and returns self
        '''
        if self.params is None:
            raise ValueError('params must be defined for simulation')

        # initiate by running a LeavesAndClades simulation to get the number of clones and mutants
        # in the root node of the collapsed tree
        LeavesAndClades.simulate(self)
        self.tree = TreeNode()
        self.tree.add_feature('frequency', self.c)
        if self.m == 0:
            return self
        for _ in range(self.m):
            # ooooh, recursion
            child = CollapsedTree(params=self.params,
                                  frame=self.frame).simulate().tree
            child.dist = 1
            self.tree.add_child(child)

        return self

Beispiel #20

0

Datei anzeigen

    def coalesce_paths(self, child_paths, t0):
        """
        Create a new TreeNode and assign a given list of child nodes and its host node.
        :param child_paths:  A list of TreeNodes in the pathogen tree.
        :param t0:  Time of pathogen coalescence as height
        :return:  A tuple containing:
            1. TreeNode object for the new pathogen lineage.
            2. updated extant list
        """
        assert len(child_paths
                   ) == 2, 'Can only coalesce 2 pathogen lineages at a time'
        p1, p2 = child_paths

        assert p1 in self.extant_p and p2 in self.extant_p, 'Both pathogen lineages must be extant'
        assert p1.host == p2.host, 'Can only coalesce pathogen lineages in the same host'
        host = p1.host

        assert p1.height < t0 and p2.height < t0, \
            'Pathogen lineage heights %f %f cannot exceed coalescent event %f' % (p1.height, p2.height, t0)

        # create new pathogen lineage
        new_path = TreeNode(name='_'.join([x.name for x in child_paths]),
                            dist=0)
        new_path.add_features(host=host, height=t0)

        # cast child_paths as a List because ete3.Tree.children requires it
        new_path.children = list(child_paths)
        self.extant_p.append(new_path)

        # coalesced pathogen lineages are no longer extant
        for node in child_paths:
            node.up = new_path
            node.dist = t0 - node.height  # when node was created, we stored the height
            self.extant_p.remove(node)
            self.not_extant_p.append(node)

        return new_path

Beispiel #21

0

Datei anzeigen

Datei: phylogeny.py Projekt: CovertLab/wcecoli-colony-analysis

def make_ete_trees(agent_ids: Iterable[str]) -> List[TreeNode]:
    '''Construct an ETE Toolkit Tree from a sequence of agent IDs

    Agent IDs must be constructed such that for any agent with ID
    :math:`p` with a parent with ID :math:`p`, :math:`p == c[:-1]`. This
    function should be able to handle multiple phylogenies among the
    agents, but this behavior is not guaranteed, tested, nor supported.

    Args:
        agent_ids: Sequence of agent IDs to build a tree from.

    Returns:
        A list of the roots of the created trees.
    '''
    stem = os.path.commonprefix(list(agent_ids))
    id_node_map: Dict[str, TreeNode] = dict()
    sorted_agents = sorted(agent_ids)
    roots: List[TreeNode] = []
    for agent_id in sorted_agents:
        phylogeny_id = agent_id[len(stem):]
        try:
            if phylogeny_id:
                int(phylogeny_id)
        except ValueError as e:
            raise ValueError(
                'String in ID {} after stem {} is non-numeric'.format(
                    agent_id, stem)) from e
        parent_phylo_id = phylogeny_id[:-1]
        if parent_phylo_id in id_node_map:
            parent = id_node_map[parent_phylo_id]
            child = parent.add_child(name=agent_id)
        else:
            child = TreeNode(name=agent_id)
            roots.append(child)
        id_node_map[phylogeny_id] = child
    return roots

Beispiel #22

0

Datei anzeigen

    def get_tree_from_CCP(self, method, bip=None, node=None):
        """
        RECURSIVE
        build a tree from the CCP distribution
        
        Takes:
            - method (function) : function that takes a bipartition id and returns a tuple of children bipartition ids
            - bip (int): bip id
            - node (ete3.TreeNode): current tree
            
        Returns:
            (ete3.TreeNode): phylogenetic tree drawn from the CCP distribution
        """

        DIP = []
        BLEN = []

        if node == None:  ##nothing is created yet
            root_bip = None  ##bip that contains all leaves but one
            leaf_bip = None  ##bip that contains only 1 leaf

            for bip in self.dbip_set.keys():
                if len(self.dbip_set[bip]) == self.number_of_leaves(
                ) - 1:  ##all leaves but one type of clade
                    root_bip = bip

                    one_leaf_set = set(self.dleaf_id.keys()) - self.dbip_set[
                        bip]  ##complementary leaf set. Only one leaf

                    leaf_bip = self.get_bip_from_leafset(one_leaf_set)

                    break

            leaf_bip_count = self.dbip_count[leaf_bip]
            leaf_bip_blen = self.dbip_bls[leaf_bip] / leaf_bip_count

            ##as this is the root, this length will be divided between both of the root children.
            DIP = [leaf_bip, root_bip]
            BLEN = [leaf_bip_blen / 2., leaf_bip_blen / 2.]

            ##... creating the root node
            node = Tree()

        elif len(self.dbip_set[bip]) == 1:  ##the current bip is a leaf

            leaf_id = [i for i in self.dbip_set[bip]][0]
            leaf_name = self.dleaf_id[leaf_id]

            node.name = leaf_name

            return node

        else:  ##bipartition node that is not the root: draw bipatition using the method function

            DIP = method(bip)  ##choosing a split of the clade

            for d in DIP:
                BLEN.append(self.dbip_bls[d] * 1. / self.dbip_count[d])

        ##for each new clade in dip, we create a child node

        for i, d in enumerate(DIP):
            new = TreeNode(dist=BLEN[i])
            #new = node.newnode()##creating new node

            node.add_child(new)
            #node.link_child(new,newlen=BLEN[i])##linking it as child and giving it its length

            self.get_tree_from_CCP(method, d, new)  #RECURSION

        return node

Beispiel #23

0

Datei anzeigen

Datei: miseq_pacbio_tree.py Projekt: HIVDiversity/NGS_analysis_pipeline

def bub_tree(tree, fasta, outfile1, root, types, c_dict, show, size, colours,
             field1, field2, scale, multiplier, dna):
    """
    :param tree: tree object from ete
    :param fasta: the fasta file used to make the tree
    :param outfile1: outfile suffix
    :param root: sequence name to use as root
    :param types: tree type: circular (c) or rectangle (r)
    :param c_dict: dictionary mapping colour to time point (from col_map)
    :param show: show the tree in a gui (y/n)
    :param size: scale the terminal nodes by frequency information (y/n)
    :param colours: if using a matched fasta file, colour the sequence by charge/IUPAC
    :param field1: the field that contains the size/frequency value
    :param field2: the field that contains the size/frequency value
    :param scale: how much to scale the x axis
    :param multiplier
    :param dna true/false, is sequence a DNA sequence?
    :param t_list list of time points
    :return: None, outputs svg/pdf image of the tree
    """

    if multiplier is None:
        mult = 500
    else:
        mult = multiplier

    if dna:
        dna_prot = 'dna'
        bg_c = {
            'A': 'green',
            'C': 'blue',
            'G': 'black',
            'T': 'red',
            '-': 'grey',
            'X': 'white'
        }

        fg_c = {
            'A': 'black',
            'C': 'black',
            'G': 'black',
            'T': 'black',
            '-': 'black',
            'X': 'white'
        }
    else:
        dna_prot = 'aa'
        bg_c = {
            'K': '#145AFF',
            'R': '#145AFF',
            'H': '#8282D2',
            'E': '#E60A0A',
            'D': '#E60A0A',
            'N': '#00DCDC',
            'Q': '#00DCDC',
            'S': '#FA9600',
            'T': '#FA9600',
            'L': '#0F820F',
            'I': '#0F820F',
            'V': '#0F820F',
            'Y': '#3232AA',
            'F': '#3232AA',
            'W': '#B45AB4',
            'C': '#E6E600',
            'M': '#E6E600',
            'A': '#C8C8C8',
            'G': '#EBEBEB',
            'P': '#DC9682',
            '-': 'grey',
            'X': 'white'
        }

        fg_c = {
            'K': 'black',
            'R': 'black',
            'H': 'black',
            'E': 'black',
            'D': 'black',
            'N': 'black',
            'Q': 'black',
            'S': 'black',
            'T': 'black',
            'L': 'black',
            'I': 'black',
            'V': 'black',
            'Y': 'black',
            'F': 'black',
            'W': 'black',
            'C': 'black',
            'M': 'black',
            'A': 'black',
            'G': 'black',
            'P': 'black',
            '-': 'grey',
            'X': 'white'
        }

    if colours == 3:
        bg_c = None
        fg_c = None

    # outfile3 = str(outfile1.replace(".svg", ".nwk"))

    tstyle = TreeStyle()
    tstyle.force_topology = False
    tstyle.mode = types
    tstyle.scale = scale
    tstyle.min_leaf_separation = 0
    tstyle.optimal_scale_level = 'full'  # 'mid'
    # tstyle.complete_branch_lines_when_necessary = False
    if types == 'c':
        tstyle.root_opening_factor = 0.25

    tstyle.draw_guiding_lines = False
    tstyle.guiding_lines_color = 'slateblue'
    tstyle.show_leaf_name = False
    tstyle.allow_face_overlap = True
    tstyle.show_branch_length = False
    tstyle.show_branch_support = False
    TreeNode(format=0, support=True)
    # tnode = TreeNode()

    if root is not None:
        tree.set_outgroup(root)
    # else:
    #     r = tnode.get_midpoint_outgroup()
    #     print("r", r)
    #     tree.set_outgroup(r)
    time_col = []
    for node in tree.traverse():
        # node.ladderize()
        if node.is_leaf() is True:
            try:
                name = node.name.split("_")
                time = name[field2]
                kind = name[3]
                # print(name)
            except:
                time = 'zero'
                name = node.name
                print("Incorrect name format for ", node.name)

            if size is True:
                try:
                    s = 20 + float(name[field1]) * mult
                except:
                    s = 20
                    print("No frequency information for ", node.name)
            else:
                s = 20

            colour = c_dict[time]
            time_col.append((time, colour))
            nstyle = NodeStyle()
            nstyle["fgcolor"] = colour
            nstyle["size"] = s
            nstyle["hz_line_width"] = 10
            nstyle["vt_line_width"] = 10
            nstyle["hz_line_color"] = colour
            nstyle["vt_line_color"] = 'black'
            nstyle["hz_line_type"] = 0
            nstyle["vt_line_type"] = 0
            node.set_style(nstyle)

            if root is not None and node.name == root:  # place holder in case you want to do something with the root leaf
                print('root is ', node.name)
                # nstyle["shape"] = "square"
                # nstyle["fgcolor"] = "black"
                # nstyle["size"] = s
                # nstyle["shape"] = "circle"
                # node.set_style(nstyle)

            else:
                nstyle["shape"] = "circle"
                node.set_style(nstyle)

            if fasta is not None:
                seq = fasta[str(node.name)]
                seqFace = SequenceFace(seq,
                                       seqtype=dna_prot,
                                       fsize=10,
                                       fg_colors=fg_c,
                                       bg_colors=bg_c,
                                       codon=None,
                                       col_w=40,
                                       alt_col_w=3,
                                       special_col=None,
                                       interactive=True)
                # seqFace = SeqMotifFace(seq=seq, motifs=None, seqtype=dna_prot, gap_format=' ', seq_format='()', scale_factor=20,
                #              height=20, width=50, fgcolor='white', bgcolor='grey', gapcolor='white', )
                # seqFace = SeqMotifFace(seq, seq_format="seq", fgcolor=fg_c, bgcolor=bg_c) #interactive=True

                (tree & node.name).add_face(seqFace, 0, "aligned")

        else:
            nstyle = NodeStyle()
            nstyle["size"] = 0.1
            nstyle["hz_line_width"] = 10
            nstyle["vt_line_width"] = 10
            node.set_style(nstyle)
            continue
    tree.ladderize()
    # tnode.ladderize()
    legendkey = sorted(set(time_col))
    legendkey = [(tp, col) for tp, col in legendkey]
    # legendkey.insert(0, ('Root', 'black'))
    legendkey.append(('', 'white'))

    for tm, clr in legendkey:
        tstyle.legend.add_face(faces.CircleFace(30, clr), column=0)
        tstyle.legend.add_face(faces.TextFace('\t' + tm,
                                              ftype='Arial',
                                              fsize=60,
                                              fgcolor='black',
                                              tight_text=True),
                               column=1)
    if show is True:
        tree.show(tree_style=tstyle)

    tree.render(outfile1, dpi=600, tree_style=tstyle)

Beispiel #24

0

Datei anzeigen

Datei: simulator.py Projekt: m-vieira/bcr-phylo-benchmark

    def simulate(self,
                 sequence,
                 pair_bounds=None,
                 lambda_=0.9,
                 lambda0=[1],
                 N=None,
                 T=None,
                 n=None,
                 verbose=False,
                 selection_params=None):
        '''
        Simulate a poisson branching process with mutation introduced
        by the chosen mutation model e.g. motif or uniform.
        Can either simulate under a neutral model without selection,
        or using an affinity muturation inspired model for selection.
        '''
        progeny = poisson(lambda_)  # Default progeny distribution
        stop_dist = None  # Default stopping criterium for affinity simulation
        # Checking the validity of the input parameters:
        if N is not None and T is not None:
            raise ValueError(
                'Only one of N and T can be used. One must be None.')
        if selection_params is not None and T is None:
            raise ValueError(
                'Simulation with selection was chosen. A time, T, must be specified.'
            )
        elif N is None and T is None:
            raise ValueError('Either N or T must be specified.')
        if N is not None and n is not None and n[-1] > N:
            raise ValueError('n ({}) must not larger than N ({})'.format(
                n[-1], N))
        elif N is not None and n is not None and len(n) != 1:
            raise ValueError(
                'n ({}) must a single value when specifying N'.format(n))
        if T is not None and len(T) > 1 and (n is None or
                                             (len(n) != 1
                                              and len(n) != len(T))):
            raise ValueError(
                'n must be specified when using intermediate sampling:', n)
        elif T is not None and len(T) > 1 and len(n) == 1:
            n = [n[-1]] * len(T)

        # Planting the tree:
        tree = TreeNode()
        tree.dist = 0
        tree.add_feature('sequence', sequence)
        tree.add_feature('terminated', False)
        tree.add_feature('sampled', False)
        tree.add_feature('frequency', 0)
        tree.add_feature('time', 0)

        if selection_params is not None:
            hd_generation = list(
            )  # Collect an array of the counts of each hamming distance at each time step
            stop_dist, mature_affy, naive_affy, target_dist, target_count, skip_update, A_total, B_total, Lp, k, outbase = selection_params
            # Make a list of target sequences:
            targetAAseqs = [
                self.one_mutant(sequence, target_dist)
                for i in range(target_count)
            ]
            # Assert that the target sequences are comparable to the naive sequence:
            aa = translate(tree.sequence)
            assert (sum([1 for t in targetAAseqs if len(t) != len(aa)]) == 0
                    )  # All targets are same length
            assert (sum([
                1 for t in targetAAseqs
                if hamming_distance(aa, t) == target_dist
            ]))  # All target are "target_dist" away from the naive sequence
            # Affinity is an exponential function of hamming distance:
            assert (target_dist > 0)

            def hd2affy(hd):
                return (mature_affy + hd**k *
                        (naive_affy - mature_affy) / target_dist**k)

            # We store both the amino acid sequence and the affinity as tree features:
            tree.add_feature('AAseq', str(aa))
            tree.add_feature(
                'Kd', selection_utils.calc_Kd(tree.AAseq, targetAAseqs,
                                              hd2affy))
            tree.add_feature(
                'target_dist',
                min([
                    hamming_distance(tree.AAseq, taa) for taa in targetAAseqs
                ]))

        t = 0  # <-- Time at start
        leaves_unterminated = 1
        # Small lambdas are causing problems so make a minimum:
        lambda_min = 10e-10
        hd_distrib = []
        while leaves_unterminated > 0 and (
                leaves_unterminated < N if N is not None else
                True) and (t < max(T) if T is not None else True) and (
                    stop_dist >= min(hd_distrib)
                    if stop_dist is not None and t > 0 else True):
            if verbose:
                print('At time:', t)
            t += 1
            # Sample intermediate time point:
            if T is not None and len(T) > 1 and (t - 1) in T:
                si = T.index(t - 1)
                live_nostop_leaves = [
                    l for l in tree.iter_leaves()
                    if not l.terminated and not has_stop(l.sequence)
                ]
                random.shuffle(live_nostop_leaves)
                if len(live_nostop_leaves) < n[si]:
                    raise RuntimeError(
                        'tree with {} leaves, less than what desired for intermediate sampling {}. Try later generation or increasing the carrying capacity.'
                        .format(leaves_unterminated, n))
                # Make the sample and kill the cells sampled:
                for leaf in live_nostop_leaves[:n[si]]:
                    leaves_unterminated -= 1
                    leaf.sampled = True
                    leaf.terminated = True
                if verbose:
                    print('Made an intermediate sample at time:', t - 1)
            live_leaves = [l for l in tree.iter_leaves() if not l.terminated]
            random.shuffle(live_leaves)
            skip_lambda_n = 0  # At every new round reset the all the lambdas
            # Draw progeny for each leaf:
            for leaf in live_leaves:
                if selection_params is not None:
                    if skip_lambda_n == 0:
                        skip_lambda_n = skip_update + 1  # Add one so skip_update=0 is no skip
                        tree = selection_utils.lambda_selection(
                            tree, targetAAseqs, hd2affy, A_total, B_total, Lp)
                    if leaf.lambda_ > lambda_min:
                        progeny = poisson(leaf.lambda_)
                    else:
                        progeny = poisson(lambda_min)
                    skip_lambda_n -= 1
                n_children = progeny.rvs()
                leaves_unterminated += n_children - 1  # <-- Getting 1, is equal to staying alive
                if not n_children:
                    leaf.terminated = True
                for child_count in range(n_children):
                    # If sequence pair mutate them separately with their own mutation rate:
                    if pair_bounds is not None:
                        mutated_sequence1 = self.mutate(
                            leaf.sequence[pair_bounds[0][0]:pair_bounds[0][1]],
                            lambda0=lambda0[0])
                        mutated_sequence2 = self.mutate(
                            leaf.sequence[pair_bounds[1][0]:pair_bounds[1][1]],
                            lambda0=lambda0[1])
                        mutated_sequence = mutated_sequence1 + mutated_sequence2
                    else:
                        mutated_sequence = self.mutate(leaf.sequence,
                                                       lambda0=lambda0[0])
                    child = TreeNode()
                    child.dist = sum(
                        x != y
                        for x, y in zip(mutated_sequence, leaf.sequence))
                    child.add_feature('sequence', mutated_sequence)
                    if selection_params is not None:
                        aa = translate(child.sequence)
                        child.add_feature('AAseq', str(aa))
                        child.add_feature(
                            'Kd',
                            selection_utils.calc_Kd(child.AAseq, targetAAseqs,
                                                    hd2affy))
                        child.add_feature(
                            'target_dist',
                            min([
                                hamming_distance(child.AAseq, taa)
                                for taa in targetAAseqs
                            ]))
                    child.add_feature('frequency', 0)
                    child.add_feature('terminated', False)
                    child.add_feature('sampled', False)
                    child.add_feature('time', t)
                    leaf.add_child(child)
            if selection_params is not None:
                hd_distrib = [
                    min([
                        hamming_distance(tn.AAseq, ta) for ta in targetAAseqs
                    ]) for tn in tree.iter_leaves() if not tn.terminated
                ]
                if target_dist > 0:
                    hist = scipy.histogram(hd_distrib,
                                           bins=list(range(target_dist * 10)))
                else:  # Just make a minimum of 10 bins
                    hist = scipy.histogram(hd_distrib, bins=list(range(10)))
                hd_generation.append(hist)
                if verbose and hd_distrib:
                    print('Total cell population:', sum(hist[0]))
                    print('Majority hamming distance:', scipy.argmax(hist[0]))
                    print('Affinity of latest sampled leaf:', leaf.Kd)
                    print(
                        'Progeny distribution lambda for the latest sampled leaf:',
                        leaf.lambda_)

        if leaves_unterminated < N:
            raise RuntimeError(
                'Tree terminated with {} leaves, {} desired'.format(
                    leaves_unterminated, N))

        # Keep a histogram of the hamming distances at each generation:
        if selection_params is not None:
            with open(outbase + '_selection_runstats.p', 'wb') as f:
                pickle.dump(hd_generation, f)

        # Each leaf in final generation gets an observation frequency of 1, unless downsampled:
        if T is not None and len(T) > 1:
            # Iterate the intermediate time steps (excluding the last time):
            for Ti in sorted(T)[:-1]:
                si = T.index(Ti)
                # Only sample those that have been 'sampled' at intermediate sampling times:
                final_leaves = [
                    leaf for leaf in tree.iter_descendants()
                    if leaf.time == Ti and leaf.sampled
                ]
                if len(final_leaves) < n[si]:
                    raise RuntimeError(
                        'tree terminated with {} leaves, less than what desired after downsampling {}'
                        .format(leaves_unterminated, n[si]))
                for leaf in final_leaves:  # No need to down-sample, this was already done in the simulation loop
                    leaf.frequency = 1
        if selection_params and max(T) != t:
            raise RuntimeError(
                'tree terminated with before the requested sample time.')
        # Do the normal sampling of the last time step:
        final_leaves = [
            leaf for leaf in tree.iter_leaves()
            if leaf.time == t and not has_stop(leaf.sequence)
        ]
        # Report stop codon sequences:
        stop_leaves = [
            leaf for leaf in tree.iter_leaves()
            if leaf.time == t and has_stop(leaf.sequence)
        ]
        if stop_leaves:
            print(
                'Tree contains {} leaves with stop codons, out of {} total at last time point.'
                .format(len(stop_leaves), len(final_leaves)))

        if T is not None:
            si = T.index(sorted(T)[-1])
        else:
            si = 0
        # By default, downsample to the target simulation size:
        if n is not None and len(final_leaves) >= n[si]:
            for leaf in random.sample(final_leaves, n[si]):
                leaf.frequency = 1
        elif n is None and N is not None:
            if len(
                    final_leaves
            ) < N:  # Removed nonsense sequences might decrease the number of final leaves to less than N
                N = len(final_leaves)
            for leaf in random.sample(final_leaves, N):
                leaf.frequency = 1
        elif N is None and T is not None:
            for leaf in final_leaves:
                leaf.frequency = 1
        elif n is not None and len(final_leaves) < n[si]:
            raise RuntimeError(
                'tree terminated with {} leaves, less than what desired after downsampling {}'
                .format(leaves_unterminated, n[si]))
        else:
            raise RuntimeError('Unknown option.')

        # Prune away lineages that are unobserved:
        for node in tree.iter_descendants():
            if sum(node2.frequency for node2 in node.traverse()) == 0:
                node.detach()

        # Remove unobserved unifurcations:
        for node in tree.iter_descendants():
            parent = node.up
            if node.frequency == 0 and len(node.children) == 1:
                node.delete(prevent_nondicotomic=False)
                node.children[0].dist = hamming_distance(
                    node.children[0].sequence, parent.sequence)

        # Assign unique names to each node:
        for i, node in enumerate(tree.traverse(), 1):
            node.name = 'simcell_{}'.format(i)

        # Return the uncollapsed tree:
        return tree

Beispiel #25

0

Datei anzeigen

Datei: utils.py Projekt: alexanderwhatley/Stanford-Project-Code

def build_tree(fcs_paths, num_neighbors, prop_filter=0.1):
    '''
    fcs_paths: dictionary of (cluster numbers, path)
    num_neighbors: number of neighbors used in X-shift 
    prop_filter: proportion of cells for edge between clusters to be created 
    '''
    # first initialize tree with 1 node at top and its children
    tree = TreeNode(name=0)
    leaves = {0: tree}
    _, cluster_data_child = fcsparser.parse(fcs_paths[0])
    cluster_data_child = process_fcs(cluster_data_child)
    tree.add_features(coords=cluster_data_child[['X', 'Y', 'Z']])
    tree.add_features(cluster_id=0)
    child_cluster_counts = cluster_data_child['cluster_id'].value_counts()
    child_coords = cluster_data_child[['cluster_id', 'sample', 'X', 'Y', 'Z']]
    child_coords_groupby = child_coords.groupby('cluster_id')
    child_coords = {
        group: child_coords.loc[inds, ['X', 'Y', 'Z', 'sample']]
        for group, inds in child_coords_groupby.groups.items()
    }
    clusters = list(child_cluster_counts.keys())
    child_cluster_counts /= child_cluster_counts.sum()
    proportions = {}
    for child_node_id, val in child_cluster_counts.iteritems():
        proportions[child_node_id] = {0: val}
    # set proportion filter to 0 for first layer, as everything is a child of the vertex
    tree, leaves = add_tree_layer(tree,
                                  leaves,
                                  clusters,
                                  proportions,
                                  child_coords,
                                  prop_filter=0)

    # build the rest of the tree
    for ind, nn in enumerate(num_neighbors[:-1]):
        _, cluster_data_parent = fcsparser.parse(fcs_paths[ind])
        _, cluster_data_child = fcsparser.parse(fcs_paths[ind + 1])
        cluster_data_parent = process_fcs(cluster_data_parent)
        cluster_data_child = process_fcs(cluster_data_child)

        child_cluster_counts = cluster_data_child['cluster_id'].value_counts()
        clusters = list(child_cluster_counts.keys())
        match_data_parent = cluster_data_parent[['X', 'Y', 'Z',
                                                 'cluster_id']].astype(int)
        match_data_child = cluster_data_child[['X', 'Y', 'Z',
                                               'cluster_id']].astype(int)
        merged = pd.merge(match_data_parent,
                          match_data_child,
                          on=['X', 'Y', 'Z'])
        parent_clusters = merged['cluster_id_x'].tolist()
        child_clusters = merged['cluster_id_y'].tolist()
        child_coords = cluster_data_child[[
            'cluster_id', 'sample', 'X', 'Y', 'Z'
        ]]
        child_coords_groupby = child_coords.groupby('cluster_id')
        child_coords = {
            group: child_coords.loc[inds, ['X', 'Y', 'Z', 'sample']]
            for group, inds in child_coords_groupby.groups.items()
        }
        proportions = defaultdict(Counter)
        for parent_cluster, child_cluster in zip(parent_clusters,
                                                 child_clusters):
            proportions[child_cluster][
                parent_cluster] += 1 / child_cluster_counts[child_cluster]
        tree, leaves = add_tree_layer(tree, leaves, clusters, proportions,
                                      child_coords, prop_filter)

    return tree

Beispiel #26

0

Datei anzeigen

def create_tree_node(seq, frequency=0):
    tree = TreeNode()
    tree.add_feature('sequence', seq)
    tree.add_feature('frequency', frequency)

    return tree

Beispiel #27

0

Datei anzeigen

#     def name(self):
#         return self._name

#     def parent(self):
#         return self._parent

#     def assignParent(self, parent):
#         if( self._parent is None):
#             self._parent = parent
#         elif self._parent == parent:
#             return
#         else:
#             raise Exception("mismatched parents")

G = TreeNode(name=u'cellular organisms')
nodes = {u'cellular organisms': G}
nstyle = NodeStyle()
nstyle['shape'] = 'circle'
nstyle['size'] = 3


def layout(node):
    #print(node)
    if (len(node.get_ancestors()) < 4):
        print(node.name)
        n = AttrFace("name", fsize=9)
        n.margin_top = 10
        n.margin_bottom = 0
        n.margin_left = 10
        faces.add_face_to_node(n, node, 0, position="float")

Beispiel #28

0

Datei anzeigen

    if (args['--format']):
        ShowFormat()
        sys.exit(-1)

    basehtml = args['--html'] if args['--html'] else 'base.html'

    from ete3 import Tree, TreeNode
    #read ped file from stdin.
    ped_data = {}  #map for name -> raw data.
    node_data = {}  #map for name -> TreeNode
    for line in sys.stdin:
        line = line.strip()
        if line and line[0] != '#':  #skip comment line.
            ss = line.split()
            ped_data[ss[1]] = ss
            n = TreeNode(name=ss[1])
            n.add_feature('raw', ss)
            node_data[ss[1]] = n

    # for k,v in node_data.items():
    #     print(v.write(format=2,features=['raw']))

    #find the root node, and convert results to josn.
    #Check data integrity.
    m_error = False
    for _, data in ped_data.items():
        if data[2] != '0' and data[2] not in ped_data.keys():
            m_error = True
            sys.stderr.write('ERROR: missing declearation for father: %s\n' %
                             (data[2]))
        if data[3] != '0' and data[3] not in ped_data.keys():

Beispiel #29

0

Datei anzeigen

def subdivideSpTree(spTree):
    """
    Takes:
        - spTree (ete3.Tree) : an ULTRAMETRIC species tree

    Returns:
        (ete3.Tree) : subdivided species tree where all nodes have a timeSlice feature
        or
        None if the species tree is not ultrametric
    """
    newSpTree = deepcopy(spTree)

    featureName = "timeSlice"

    ##1/ getting distance from root.
    Dheight = getDistFromRootDic(newSpTree , checkUltrametric = True)

    if Dheight is None:
        print "!!ERROR!! : the species tree is not ultrametric"
        return None

    # we know that there is n-1 internal nodes (where n is the number of leaves)
    # hence the maximal timeSlice is n-1 (all leaves have timeSlice 0)

    ##2/assign timeSlice to nodes
    currentTS = len(newSpTree.get_leaves()) - 1


    for n,h in sorted(Dheight.iteritems(), key=lambda (k,v): (v,k)):
        n.add_feature(featureName, currentTS )

        if currentTS != 0:
            currentTS -= 1


    #print newSpTree.get_ascii(attributes=[featureName,"name"])

    ##3/subdivide according to timeSlice
    RealNodes = [i for i in  newSpTree.traverse()]

    for n in RealNodes:
        if n.is_root():
            continue

        nodeToAdd = n.up.timeSlice - n.timeSlice - 1

        while nodeToAdd > 0:
            parentNode = n.up
            
            n.detach()
            
            NullNode = TreeNode()
            NullNode.add_feature( featureName, parentNode.timeSlice - 1 )

            if "dead" in n.features:
                NullNode.add_feature("dead" , n.dead)

            parentNode.add_child(NullNode)
            NullNode.add_child(n)
            nodeToAdd -= 1 

    #print newSpTree.get_ascii(attributes=[featureName,"name"])
    return newSpTree

Beispiel #30

0

Datei anzeigen

def parse_nexus(tree_path, columns=None):
    trees = []
    for nex_tree in read_nexus(tree_path):
        todo = [(nex_tree.root, None)]
        tree = None
        while todo:
            clade, parent = todo.pop()
            dist = 0
            try:
                dist = float(clade.branch_length)
            except:
                pass
            name = getattr(clade, 'name', None)
            if not name:
                name = getattr(clade, 'confidence', None)
                if not isinstance(name, str):
                    name = None
            node = TreeNode(dist=dist, name=name)
            if parent is None:
                tree = node
            else:
                parent.add_child(node)

            # Parse LSD2 dates and CIs, and PastML columns
            date, ci = None, None
            columns2values = defaultdict(set)
            comment = getattr(clade, 'comment', None)
            if isinstance(comment, str):
                date = next(iter(re.findall(DATE_COMMENT_REGEX, comment)),
                            None)
                ci = next(iter(re.findall(CI_DATE_REGEX_LSD, comment)), None)
                if ci is None:
                    ci = next(iter(re.findall(CI_DATE_REGEX_PASTML, comment)),
                              None)
                if columns:
                    for column in columns:
                        values = \
                            set.union(*(set(_.split('|')) for _ in re.findall(COLUMN_REGEX_PASTML.format(column=column),
                                                                              comment)), set())
                        if values:
                            columns2values[column] |= values
            comment = getattr(clade, 'branch_length', None)
            if not ci and not parent and isinstance(comment, str):
                ci = next(iter(re.findall(CI_DATE_REGEX_LSD, comment)), None)
                if ci is None:
                    ci = next(iter(re.findall(CI_DATE_REGEX_PASTML, comment)),
                              None)
            comment = getattr(clade, 'confidence', None)
            if ci is None and comment is not None and isinstance(comment, str):
                ci = next(iter(re.findall(CI_DATE_REGEX_LSD, comment)), None)
                if ci is None:
                    ci = next(iter(re.findall(CI_DATE_REGEX_PASTML, comment)),
                              None)
            if date is not None:
                try:
                    date = float(date)
                    node.add_feature(DATE, date)
                except:
                    pass
            if ci is not None:
                try:
                    ci = [float(_) for _ in ci]
                    node.add_feature(DATE_CI, ci)
                except:
                    pass
            if columns2values:
                for c, vs in columns2values.items():
                    node.add_feature(c, vs)
            todo.extend((c, node) for c in clade.clades)
        for n in tree.traverse('preorder'):
            date, ci = getattr(n, DATE, None), getattr(n, DATE_CI, None)
            if date is not None or ci is not None:
                for c in n.children:
                    if c.dist == 0:
                        if getattr(c, DATE, None) is None:
                            c.add_feature(DATE, date)
                        if getattr(c, DATE_CI, None) is None:
                            c.add_feature(DATE_CI, ci)
        for n in tree.traverse('postorder'):
            date, ci = getattr(n, DATE, None), getattr(n, DATE_CI, None)
            if not n.is_root() and n.dist == 0 and (date is not None
                                                    or ci is not None):
                if getattr(n.up, DATE, None) is None:
                    n.up.add_feature(DATE, date)
                if getattr(n.up, DATE_CI, None) is None:
                    n.up.add_feature(DATE_CI, ci)

        # propagate dates up to the root if needed
        if getattr(tree, DATE, None) is None:
            dated_node = next((n for n in tree.traverse()
                               if getattr(n, DATE, None) is not None), None)
            if dated_node:
                while dated_node != tree:
                    if getattr(dated_node.up, DATE, None) is None:
                        dated_node.up.add_feature(
                            DATE,
                            getattr(dated_node, DATE) - dated_node.dist)
                    dated_node = dated_node.up

        trees.append(tree)
    return trees