Example #1
0
def _add_observed_isotypes(
    tree: ete3.Tree,
    newidmap: Dict[str, str],
    isotype_order: Sequence[str],
    weight_matrix: Optional[Sequence[Sequence[float]]] = None,
):
    # Drop observed nodes as leaves and explode by observed isotype:
    # Descend internal observed nodes as leaves:
    newisotype = IsotypeTemplate(isotype_order,
                                 weight_matrix=weight_matrix).new
    for node in list(tree.iter_descendants()):
        if node.abundance > 0 and not node.is_leaf():
            newchild = ete3.TreeNode(name=node.name)
            newchild.add_feature("sequence", node.sequence)
            newchild.add_feature("abundance", node.abundance)
            node.abundance = 0
            node.add_child(child=newchild)
    # Now duplicate nodes which represent multiple isotypes
    for node in list(tree.get_leaves()):
        if node.abundance == 0:
            node.add_feature("isotype", newisotype("?"))
        else:
            try:
                thisnode_isotypemap = newidmap[node.name]
            except KeyError as e:
                warnings.warn(
                    f"The sequence name {e} labels an observed node, but no mapping to an original sequence ID was found."
                    " Isotype will be assumed ambiguous.")
                thisnode_isotypemap = {
                    "?": {f"Unknown_id_{n+1}"
                          for n in range(node.abundance)}
                }
            if "?" in thisnode_isotypemap:
                warnings.warn(
                    f"The sequence name {node.name} labels an observed node, and corresponds to sequence IDs for "
                    "which no observed isotype was provided. "
                    f" Isotype will be assumed ambiguous for: {', '.join(thisnode_isotypemap['?'])}"
                )
            # node.name had better be in newidmap, since this is an observed node
            if len(thisnode_isotypemap) > 1:
                for isotype, cell_ids in thisnode_isotypemap.items():
                    # add new node below this leaf node. Must be below, and not child
                    # of parent, to preserve max parsimony in case that node.up has
                    # different sequence from node.
                    newchild = ete3.TreeNode(name=node.name)
                    newchild.add_feature("abundance", len(cell_ids))
                    newchild.add_feature("sequence", node.sequence)
                    newchild.add_feature("isotype", newisotype(isotype))
                    node.add_child(child=newchild)
                node.abundance = 0
            else:
                node.isotype = newisotype(list(thisnode_isotypemap.keys())[0])
    # Now add ancestral ambiguous isotypes
    for node in tree.traverse():
        if not node.is_leaf():
            node.add_feature("isotype", newisotype("?"))
Example #2
0
def create_nodes(node_names):
    """ Returns a list of ete3.TreeNodes from the list of names """
    nodes = []
    for name in node_names:
        new_node = ete3.TreeNode(name=name)
        nodes.append(new_node)
    return nodes
Example #3
0
def constrain_main(args):
    labels = read_item_per_line_file(args.species_list)
    if (args.backbone == 'ncbi'):
        lineages = get_lineages(labels=labels, rank=args.rank)
        taxid_counts = get_taxid_counts(lineages)
        #taxid_counts = limit_rank(taxid_counts=taxid_counts, rank=args.rank)
        tree = taxid2tree(lineages, taxid_counts)
    else:
        if (args.backbone.endswith('user')):
            tree = read_tree(args.infile, args.format)
            for node in tree.traverse():
                node.name = node.name.replace('_', ' ')
        elif (args.backbone == 'ncbi_apgiv'):
            file_path = 'data_tree/apgiv.nwk'
            nwk_string = pkg_resources.resource_string(
                __name__, file_path).decode("utf-8")
            tree = ete3.TreeNode(newick=nwk_string,
                                 format=0,
                                 quoted_node_names=True)
        tree = initialize_tree(tree)
        tree = match_taxa(tree, labels, args.backbone)
        tree = delete_nomatch_leaves(tree)
        tree = polytomize_one2many_matches(tree)
    tree = remove_singleton(tree, verbose=False, preserve_branch_length=False)
    for node in tree.traverse():
        node.name = node.name.replace(' ', '_')
    write_tree(tree, args, format=9)
Example #4
0
 def visualize(self):
     me = ete3.TreeNode(name=self.splitFeature)
     for key in self.leafes.keys():
         me.add_child(self.leafes[key].visualize())
     for key in self.branches.keys():
         me.add_child(self.branches[key].visualize())
     return me
Example #5
0
def main(name='phytree_show', args=None):
    myname = 'phytree_show'
    if name == myname:
        nwkfile, tree_format, tree_mode, show_leaf_name, show_branch_length, show_branch_support, \
            align_leaf_name, hide_inner_node, save_plot, show_inner_name, out_file = getargs(args)
        tree = ete3.TreeNode(nwkfile, format=tree_format)
        tree_style = ete3.TreeStyle()
        tree_style.mode = tree_mode
        tree_style.show_leaf_name = show_leaf_name
        tree_style.show_branch_length = show_branch_length
        if align_leaf_name:
            tree_style.show_leaf_name = False
            _align_leaf_name(tree)
        if hide_inner_node:
            _hide_inner_node(tree)
        if show_inner_name:
            _show_inner_name(tree, tree_format)
        try:
            if save_plot:
                tree.render(out_file, tree_style=tree_style)
            else:
                tree.show(tree_style=tree_style)
        except:
            print('opps...')
    return 0
Example #6
0
 def traverse_tree(circle_node, parent_node):
     for child in circle_node.get_children():
         child_node = ete3.TreeNode()
         child_node.name = child.name
         child_node.dist = child.base_dist
         parent_node.add_child(child_node)
         traverse_tree(child, child_node)
     return 0
Example #7
0
def sample(treestring, n=20):
    ogt = ete3.TreeNode(newick=newick_tree2, format=1)
    print(ogt.sequence)
    newickset = set()
    for i in range(n):
        t = ogt.copy()
        random.seed(i)
        t = pps.disambiguate(t, random_state=random.getstate())
        newickset.add(treeprint(t))
    return newickset
Example #8
0
def break_branches(tree, step=1):
    for n in tree.traverse():
        for i, ch in enumerate(n.get_children()):
            breaks = ch.dist // step
            lastdist = ch.dist % step
            ch.dist = lastdist
            for j in range(int(breaks)):
                new = ete3.TreeNode(dist=step)
                ## set this new node as the parent of the current one.
                #ch.up = new
                ## take one step back (rootward)
                #ch = new
                new.add_child(ch.detach())
                ch = new
            if breaks:
                n.add_child(ch)
Example #9
0
    def make_profile_tree(self):
        def traverse_tree(circle_node, parent_node):
            for child in circle_node.get_children():
                child_node = ete3.TreeNode()
                child_node.name = child.name
                child_node.dist = child.base_dist
                parent_node.add_child(child_node)
                traverse_tree(child, child_node)
            return 0

        root_circle_node = self.get_root_circle_node()
        root = ete3.TreeNode()
        root.dist = 0
        root.name = root_circle_node.name
        traverse_tree(root_circle_node, root)
        return root
Example #10
0
def alelabel_to_events(label, current, number_to_names):
    #events = label.split('.')
    parent_S = (None if current is None else current.S)
    n_new = 0  # DEBUG
    while label:
        logger.debug("label='%s'", label)
        event_match = REGEX_EVENT.match(label)
        event = event_match.group()
        if event.startswith('.'):
            event = event[1:]
        label = label[event_match.end():]
        #for event in events:
        #if event == '':
        #    continue
        try:
            current = current.add_child()
        except AttributeError:
            assert current is None, "parent node should be ete3.TreeNode or None."
            current = ete3.TreeNode()
        n_new += 1
        m = REGEX_TRANSFER.match(event)
        if not m:
            # Speciation and loss. i.e. transmission to one single descendant species.
            current.add_features(D=0)  # Speciation (and loss)
            branch = event
        else:
            typ, time, branch = m.groups()
            if typ == 'D@':
                current.add_features(D=2)
            elif typ == 'T@':
                # Transfer out
                # should be represented as dupli
                current.add_features(D=11, T=1)
            elif typ == '@':
                # Transfer into that branch:
                # we will represent it simply as a speciation node.
                current.add_features(D=0, T=-1)
            elif typ == 'Tb@':
                # Is it a gene divergence in non represented lineages?
                # Could be represented as Duplication or Speciation, does not matter.
                current.add_features(D=12, T=0)  # Should S==branch?
        current.add_feature('S', number_to_names.get(branch, branch))
    logger.debug('From %r: created %d new nodes leading to species %s',
                 parent_S, n_new, branch)
    return current, n_new
Example #11
0
def parentdata_to_ete3(df,
                       dist_column='dist',
                       root_value=None):  #, parent_column='parent'
    roots = []
    trees = {}
    #if isinstance(root_value, float) and isnan(root_value):
    #    def is_root(node): return isnan(df.loc[node, parent_column])
    #else:
    #    def is_root(node): return df.loc[node, parent_column] == root_value

    if dist_column is None:

        def get_dist(node):
            return 1
    else:

        def get_dist(node):
            dist = df.loc[node, dist_column]
            # NaN creates an error with ete3.Tree.show()
            return 0 if isnan(dist) else dist

    for nodename, children in roll_leafwards_indices(df,
                                                     root_value=root_value):
        #root, root_children = next(iter_leafwards)
        #if is_root(nodename):
        try:
            node = trees.pop(nodename)
        except KeyError:
            node = ete3.TreeNode(name=nodename, dist=get_dist(nodename))
            roots.append(node)

        for ch in children:
            if ch in trees:
                logger.error('Node name already used: %r (override).', ch)

            trees[ch] = node.add_child(name=ch, dist=get_dist(ch))
            logger.debug('node %r %r -> child %r dist[%r]=%s', nodename, node,
                         ch, dist_column, trees[ch].dist)

    return roots
    def extendparalogy(paralog_packs: list,
                       taxon: str,
                       parent_paralogy=None) -> None:
        """'Speciate' the paralogy, i.e divide the `paralog_packs` into their
        descendant paralog_packs in each child_taxon, and call itself again on
        each descendant paralogy"""
        nonlocal callnb, indent  #debug
        callnb += 1  #debug
        indent += 1  #debug

        pdbug(indent, taxon, 'call:', callnb, prefix='# ')

        # Create a node object representing the paralogy, if any. Attach to parent if any.
        para_size = len(paralog_packs)
        if para_size > 1:
            para_name = '|'.join('-'.join(ch.name for ch in pack)
                                 for pack in paralog_packs)
            current_paralogy = ete3.TreeNode(name=para_name)
            current_paralogy.add_feature("S", taxon)
            current_paralogy.add_feature("P", 1)  # It is a paralogy
            #current_paralogy.add_feature("D", ("Y" if getattr(parent_paralogy, 'S', None)==taxon else "N")) #optional
            #parent_paralogy.add_feature("D", )
        else:
            #assert len(paralog_packs[0]) == 1  # Not sure if it should happen
            current_paralogy = make_singleton_node(
                list(paralog_packs[0])[0].name, taxon)
            pdbug(indent, "No paralogy")

        if current_paralogy is not None:
            if parent_paralogy is None:
                current_paralogy.add_feature('D', 'Y')
                paralogies.append(current_paralogy)
                pdbug(indent, 'Create new paralogy', current_paralogy.name)
            else:
                if parent_paralogy.D == "Y" and all(
                        len(p) == 1 for p in paralog_packs):
                    current_paralogy.add_feature('A', 1)
                else:
                    current_paralogy.add_feature('A', 0)
                # It is a "sub-paralogy" (steming from a main one by gene dupli)
                parent_paralogy.add_child(current_paralogy)
                pdbug(indent, ("dup" if parent_paralogy.D == "Y" else "spe") +
                      '-extend paralogy from', parent_paralogy.name)
        #pdbug(indent, current_paralogy.get_tree_root() if current_paralogy else None, prefix='> ')
        pdbug(indent, 'paralog_packs:', paralog_packs)

        # The descendant paralog_pack in each species after the speciation:
        paralog_packs_after_speciation = {}  # {ch: [set()] * para_size}
        #speciated_taxa = set()

        # Empty paralogs if genes reached a speciation node.
        # Otherwise, replace the node by the duplication descendants and start a new paralogy
        seen_paralogs = set()  #debug

        for pack_i, paralog_pack in enumerate(paralog_packs):
            has_sub_paralogies = len(paralog_pack) > 1
            #pdbug(indent, pack_i, paralog_pack, prefix='* ')

            while paralog_pack:
                paralog = paralog_pack.pop(
                )  # if isinstance(paralog_pack, set) else paralog = paralog_pack

                children_taxa = [
                    get_taxon(ch, ancgene2sp, ensembl_version)
                    for ch in paralog.children
                ]
                event = infer_gene_event(paralog, taxon, set(children_taxa))

                #pdbug(indent, repr(paralog), event+':', paralog.children, prefix=' ** ')

                if event == 'dup':
                    #if para_size > 1 / current_paralogy is not None
                    pdbug(indent, 'Dup!')
                    if current_paralogy is not None:
                        current_paralogy.add_feature('D', 'Y')
                        #if current_paralogy.P:
                        #    # Only update the pack when we are in a paralogy.
                        #    # If not, the current branch can be dropped, because
                        #    # each paralog is already going to be checked in
                        #    # subsequent calls.
                    paralog_pack.update(paralog.children)
                    #if callnb == 32: import ipdb; ipdb.set_trace(context=1)
                    if not has_sub_paralogies:
                        assert len(set(
                            children_taxa)) == 1, "Missing speciation nodes"
                        child_taxon = children_taxa[0]
                        extendparalogy(
                            [set((ch, )) for ch in paralog.children],
                            children_taxa[0], current_paralogy)
                        if include_singleton_branches and not getattr(
                                parent_paralogy, 'P', 0):
                            # It miraculously worked. I don't know why.
                            # This is needed with option `include_singleton_branches=True`:
                            # it avoids drawing a "duplicate" branch for singleton genes left from a new paralogy node.
                            break

                        extendparalogy(paralog_packs, children_taxa[0],
                                       current_paralogy)
                        # Seems to work okay, but I suspect some paralogs are
                        # checked several times.
                        #break
                        has_sub_paralogies = True

                    indent -= 1  #debug
                    #also extendparalogy of the current paralogy (current_paralogy)
                    #if has_sub_paralogies:
                    #    paralog_pack.update(paralog.children)
                    #extendparalogy
                    #has_sub_paralogies = True

                else:
                    # What if it is a leaf? The paralog is just popped out.
                    for child_taxon, speciated_paralog in zip(
                            children_taxa, paralog.children):
                        speciated_paralog_packs = paralog_packs_after_speciation.setdefault(
                            child_taxon, [set() for p in range(para_size)])
                        #if child_taxon not in paralog_packs_after_speciation:
                        #    paralog_packs_after_speciation[child_taxon] = [set() for i * para_size
                        speciated_paralog_packs[pack_i].add(speciated_paralog)
                        #pdbug(indent, ' ** Add speciated paralog:', repr(speciated_paralog),
                        #      '->', pack_i, child_taxon)

                        assert speciated_paralog not in seen_paralogs  #debug
                        seen_paralogs.add(speciated_paralog)  #debug

        assert taxon not in paralog_packs_after_speciation, \
            "Intermediate speciation nodes are missing at: %s -> %s" % \
            (taxon, tuple(paralogs_after_speciation.keys()))

        #if para_size > 1 or no dup:
        #if there hasn't been a complete update of the paralog_pack
        # if there was a dup, this is redundant.
        for child_taxon, speciated_paralog_packs in paralog_packs_after_speciation.items(
        ):
            speciated_paralog_packs = [
                pack for pack in speciated_paralog_packs if pack
            ]

            redundant_nodes = (len(speciated_paralog_packs) > 1
                               and set.intersection(*speciated_paralog_packs))
            assert not redundant_nodes, paralog_packs_after_speciation

            pdbug(indent, 'Spe!')
            if current_paralogy is not None:
                current_paralogy.add_feature('D', 'N')
            extendparalogy(speciated_paralog_packs, child_taxon,
                           current_paralogy)
            indent -= 1  #debug
def build_tree(tablesoup, recurs=0, _recurs_count=0):
    tbody = tablesoup.findChild('tbody', recursive=False)  # not findChild

    nodes = []

    if tbody is not None:
        for row in tbody.findChildren('tr', recursive=False):
            cell0 = row.findChild('td', recursive=False)
            tagclass = cell0.get('class', [])
            if 'clade-label' in tagclass:
                cladename = cell0.get_text().strip()
                nodes.append(ete3.TreeNode(name=cladename))
                nodes[-1].add_feature('info', [])
                nodes[-1].add_feature('wikipedia_page_depth', _recurs_count)
                if 'dashed' in cell0.get('style', ()):
                    # This branch is controversial
                    nodes[-1].support = 0.5

                cladeleaf = cell0.find_next_sibling('td', class_='clade-leaf')
                child_clade = cladeleaf.findChild('table',
                                                  class_='clade',
                                                  recursive=False)
                if child_clade is not None:
                    for child_node in build_tree(child_clade, recurs,
                                                 _recurs_count):
                        nodes[-1].add_child(child=child_node)
                else:
                    leafname = cladeleaf.get_text().strip()
                    #not_img = lambda tag: "image" not in tag.get('class', '')
                    leaflink = cladeleaf.find('a')
                    leafimg = cladeleaf.find('img')
                    if not nodes[-1].name:
                        # Update the preceding node, which is actually just the leading branch.
                        nodes[-1].name = leafname
                    else:
                        nodes[-1].add_child(name=leafname)

                    if leaflink:
                        nodes[-1].add_feature('link', leaflink.get('href', ''))
                        otherlinks = [
                            l for l in leaflink.find_next_siblings('a')
                            if 'image' not in l.get('class', '')
                        ]
                        if otherlinks:
                            logger.warning("Alternative leaf links: " + \
                                    ";".join("%r class=%r" % (l.get_text(), l.get('class'))
                                             for l in otherlinks))
                    if leafimg:
                        nodes[-1].add_feature('img', leafimg['src'])
                        nodes[-1].add_feature('imgwidth', leafimg['width'])
                        nodes[-1].add_feature('imgheight', leafimg['height'])

                    if _recurs_count < recurs and leaflink and 'redlink=1' not in leaflink[
                            'href']:
                        href = leaflink['href']
                        if not href.startswith('https://'):
                            href = WIKIPEDIA_URL + href
                        hreftreesoups = get_wiki_tree(url=href)
                        if hreftreesoups:
                            logger.info("Recursing into %r from %r", leafname,
                                        tablesoup.find_parent('[document]')\
                                          .findChild('title').get_text().strip()
                                        )
                            logger.info(
                                "Found %d phylogenetic trees (at depth %d).",
                                len(hreftreesoups), _recurs_count + 1)
                            leaflinktext = leaflink.get_text().strip()

                            for hreftreesoup in hreftreesoups:
                                for hreftree in build_tree(
                                        hreftreesoup, recurs,
                                        _recurs_count + 1):
                                    matched_node = find_matching_node(
                                        hreftree, leaflinktext,
                                        *leafname.split('/'))

                                    logger.debug("Matched node: %r",
                                                 matched_node)
                                    if matched_node and not matched_node.is_leaf(
                                    ):
                                        for leafchild in matched_node.children:
                                            nodes[-1].add_child(
                                                child=leafchild)
                                        break
                                else:
                                    continue  # next hreftreesoup if no match
                                break
                            else:
                                logger.warning(
                                    "Corresponding internal node (%r/%r) not found.",
                                    leafname, leaflinktext)

            elif 'clade-slabel' in tagclass:
                nodes[-1].info.append(cell0.get_text().strip())
            else:
                logger.warning(
                    "Unexpected class of cell in the row under %r: %r",
                    (nodes[-1].name if nodes else None), tagclass)
    return nodes
def gen_test_tree():
    a = ete3.TreeNode()
    a.name = 'root'
    a.dist = 0

    b = ete3.TreeNode()
    b.name = 'b'
    b.dist = 2

    c = ete3.TreeNode()
    c.name = 'c'
    c.dist = 1

    a.add_child(b)
    a.add_child(c)

    d = ete3.TreeNode()
    d.name = 'd'
    d.dist = 2

    e = ete3.TreeNode()
    e.name = 'e'
    e.dist = 2

    b.add_child(d)
    b.add_child(e)

    f = ete3.TreeNode()
    f.name = 'f'
    f.dist = 1

    g = ete3.TreeNode()
    g.name = 'g'
    g.dist = 1

    c.add_child(f)
    c.add_child(g)

    h = ete3.TreeNode()
    h.name = 'h'
    h.dist = 1

    i = ete3.TreeNode()
    i.name = 'i'
    i.dist = 1

    d.add_child(h)
    d.add_child(i)

    j = ete3.TreeNode()
    j.name = 'j'
    j.dist = 1

    k = ete3.TreeNode()
    k.name = 'k'
    k.dist = 1

    h.add_child(j)
    h.add_child(k)

    l = ete3.TreeNode()
    l.name = 'l'
    l.dist = 1

    m = ete3.TreeNode()
    m.name = 'm'
    m.dist = 2

    e.add_child(l)
    e.add_child(m)

    n = ete3.TreeNode()
    n.name = 'n'
    n.dist = 1

    o = ete3.TreeNode()
    o.name = 'o'
    o.dist = 1

    m.add_child(n)
    m.add_child(o)

    p = ete3.TreeNode()
    p.name = 'p'
    p.dist = 1

    q = ete3.TreeNode()
    q.name = 'q'
    q.dist = 2

    f.add_child(p)
    f.add_child(q)

    r = ete3.TreeNode()
    r.name = 'r'
    r.dist = 1

    s = ete3.TreeNode()
    s.name = 's'
    s.dist = 1

    q.add_child(r)
    q.add_child(s)

    t = ete3.TreeNode()
    t.name = 't'
    t.dist = 1

    u = ete3.TreeNode()
    u.name = 'u'
    u.dist = 1

    s.add_child(t)
    s.add_child(u)

    #print(a.write(features=['name']))
    #print(a)
    #print(a.name)
    return a
Example #15
0
def duplicateOrthoTree(trees1, trees2, distDuplication=0.005) :
    result = alignTrees([trees1, trees2])
    tips = np.random.permutation(result[1][0][1].get_leaf_names())
    dists = np.random.exponential(distDuplication, size=len(tips))
    events = [[t.split('_', 1)[0], t, d] for t, d in zip(tips.tolist(), dists)]
    for res in result :
        for r in res :
            w, t1, t2 = r
            tr, ts = t1.copy(), t2.copy()
            n1 = {}
            for n in tr.get_leaves() :
                o = int(n.name.split('_', 1)[0])
                if o not in n1 :
                    n1[o] = [n]
                else :
                    n1[o].append(n)
            n2 = { n.split('_', 1)[0]:n for n in ts.get_leaf_names() }
            for o, cn1, dist in events :
                c2 = ts.get_leaves_by_name(n2[o])
                if not c2 :
                    continue
                c2 = c2[0]
                c1 = tr.get_leaves_by_name(cn1)[0]
                d1 = dist
                while d1 - c1.dist > 0 and c1.up :
                    d1, c1 = d1 - c1.dist, c1.up
                d2 = dist
                while d2 - c2.dist > 0 and c2.up :
                    d2, c2 = d2 - c2.dist, c2.up
                p, l = c1.up, c1.dist
                
                n = ete3.TreeNode()
                if p is not None :
                    p.remove_child(c1)
                n.add_child(c1)
                c1.up, c1.dist = n, d1
                
                if p is not None :
                    p.add_child(n)
                    n.dist = l - c1.dist
                else :
                    tr = n
                    for _, x, _ in events :
                        tr.get_leaves_by_name(x)[0]
                p2 = c2.up
                if p2 is not None :
                    p2.remove_child(c2)
                n.add_child(c2)
                c2.up, c2.dist = n, d2
                if p2 is not None :
                    gp = p2.up
                    if gp :
                        gp.remove_child(p2)
                        for c in p2.get_children() :
                            p2.remove_child(c)
                            gp.add_child(c)
                            c.up, c.dist = gp, c.dist + p2.dist
                    elif len(p2.get_children()) == 1 :
                        ts = p2.get_children()[0]
                        ts.up = None
                else :
                    break
            r[:] = [w, tr]
            
    return result
Example #16
0
def create_nj_tree(node_names, distance_matrix):
    """ Create a tree using the neighbor joining algorithm
        Algorithm from: "Biological sequence analysis: Probabilistic models
        of proteins and nucleic acids" by Durbin, Eddy, Krogh and Mitchinson (1998)
        Return the root of the tree
    """
    # Create the initial list of leaf nodes
    # Our nodes will be instances of ete3.TreeNode
    # We are using ete3 so we can easily visualize the tree
    t_nodes = create_nodes(node_names)
    l_nodes = t_nodes.copy()
    dist_mat = distance_matrix.copy()

    # While there is still more than 2 leaf nodes remaining
    while len(l_nodes) > 2:
        # print("create_nj_tree::  length of leaf nodes = {}".format(len(l_nodes)))
        # "Pick a pair i, j in L for which D(i,j) is minimal"
        D_matrix = create_D_matrix(dist_mat)
        # We created the D matrix, now find the minimal value
        minimum = sys.float_info.max
        minimum_coord = (0, 0)
        for i in range(len(dist_mat)):
            for j in range(len(dist_mat)):
                if D_matrix[i][j] < minimum:
                    minimum = D_matrix[i][j]
                    minimum_coord = (i, j)
        # "Define a new node k and set d(k,m) = 1/2 * (d(i,m) + d(j,m) - d(i,j)), for all m in L except i, j"
        # We want a new distance_matrix with i, j removed and k (which is just ij) added
        # First remove i and j
        new_dist_mat = remove_row_col(dist_mat, minimum_coord[0], minimum_coord[1])
        # print("new dist mat  after remove rowCol= {}".format(new_dist_mat))
        # Add in the new node
        new_dist_list = create_combined_dist_list(dist_mat, minimum_coord[0], minimum_coord[1])
        # print("new dist list = {}".format(new_dist_list))
        new_dist_mat = add_row_col(new_dist_mat, new_dist_list)
        # print("new dist mat  after add RowCol = {}".format(new_dist_mat))
        # So now we have a new_dist_mat with k added, and i and j removed
        # Now, create a TreeNode with i and j as children and set the correct distances
        k = ete3.TreeNode()
        dist_i_k = get_edge_length(dist_mat, minimum_coord[0], minimum_coord[1])
        dist_j_k = dist_mat[minimum_coord[0]][minimum_coord[1]] - dist_i_k
        k.add_child(child=l_nodes[minimum_coord[0]], dist=dist_i_k)
        k.add_child(child=l_nodes[minimum_coord[1]], dist=dist_j_k)

        # Remove i and j, add k
        del l_nodes[minimum_coord[0]]
        if minimum_coord[0] > minimum_coord[1]:
            del l_nodes[minimum_coord[1]]
        else:
            del l_nodes[minimum_coord[1] - 1]
        l_nodes.append(k)

        # print("old dist mat = {}".format(dist_mat))
        # print("new dist mat = {}".format(new_dist_mat))
        # print("length old dist mat = {}".format(len(dist_mat[0])))
        # print("length new dist mat = {}".format(len(new_dist_mat[0])))
        # finally update the dist_mat
        dist_mat = new_dist_mat
    # Now there is only 2 leaves remaining
    # Define a new root
    root = ete3.TreeNode(dist=0)
    root.add_child(child=l_nodes[0], dist=dist_mat[0][1]/2)
    root.add_child(child=l_nodes[1], dist=dist_mat[0][1] / 2)
    # Choose either of the nodes to be the root
    # root = l_nodes[0]
    # root.add_child(child=l_nodes[1], dist=dist_mat[0][1])
    return root
Example #17
0
 def visualize(self):
     return ete3.TreeNode(name=self.decision)
 def make_singleton_node(nodename, taxon):
     snode = ete3.TreeNode(name=nodename)
     snode.add_feature("S", taxon)
     snode.add_feature("P", 1)  # whether it is a paralogy branch?
     return snode