Example #1
0
def swap_tree(tree):
    
    # make safe tree
    if not '"' in tree:
        tree = nwk.safe_newick_string(tree)

    # swap two nodes of the tree
    nodes = list(nwk.nodes_in_tree(tree))[1:]
    random.shuffle(nodes)
    
    # choose two nodes to be swapped
    nodeA = nodes.pop(0)

    # get another node that can be interchanged
    while nodes:
        nodeB = nodes.pop(0)
        if nodeB in nodeA or nodeA in nodeB:
            pass
        else:
            break

    tree = tree.replace(nodeA+',', '#dummyA#,')
    tree = tree.replace(nodeA+')', '#dummyA#)')
    tree = tree.replace(nodeB+',', '#dummyB#,')
    tree = tree.replace(nodeB+')', '#dummyB#)')

    tree = tree.replace('#dummyA#', nodeB)
    tree = tree.replace('#dummyB#', nodeA)

    return nwk.sort_tree(tree).replace('"','')
Example #2
0
def all_rooted_binary_trees(*taxa):
    """
    Compute all rooted trees.

    Notes
    -----

    This procedure yields all rooted binary trees for a given set of taxa, as
    described in :bib:`Felsenstein1978`. It implements a depth-first search.
    """
    if len(taxa) <= 2:
        yield '('+','.join(taxa)+');'

    # make queue with taxa included and taxa to be visited
    queue = [('('+','.join(taxa[:2])+')', list(taxa[2:]))]

    out = []

    while queue:
        
        # add next taxon
        tree, rest = queue.pop()

        if rest:
            next_taxon = rest.pop()
            
            nodes = list(nwk.nodes_in_tree(tree))
            random.shuffle(nodes)
            for node in nodes: 
                new_tree = tree.replace(node, '('+next_taxon+','+node+')')
                
                r = [x for x in rest]
                random.shuffle(r)
                queue += [(new_tree, r)]
                if not rest:
                    yield new_tree
Example #3
0
def branch_and_bound(
        taxa, 
        patterns,
        transitions,
        characters,
        guide_tree = False,
        verbose = True,
        lower_bound = False,
        sample_steps = 100,
        ):
    """
    Try to make a branch and bound parsimony calculation.
    """
    
    # calculate the lower bound
    if lower_bound:
        pass
    elif not guide_tree:
        lower_bound = mst_weight(taxa, patterns, transitions, characters) * 2

        print("[i] Lower Bound (estimated):", lower_bound)
    else:
        lower_bound = 0
        ltree = nwk.LingPyTree(guide_tree)
        for p,t,c in zip(patterns, transitions, characters):
            lower_bound += sankoff_parsimony_up(
                    p,
                    taxa,
                    ltree,
                    t,
                    c,
                    weight_only=True
                )
        print("[i] Lower Bound (in guide tree):",lower_bound)

    trees = []

    # we start doing the same as in the case of the calculation of all rooted
    # trees below
    if len(taxa) <= 2:
        return '('+','.join(taxa)+');'

    # make queue with taxa included and taxa to be visited
    queue = [('('+','.join(taxa[:2])+')', taxa[2:], lower_bound)]

    visited = 0
    all_trees = []
    previous = 0

    M = {}

    while queue:
        queue = sorted(queue, key = lambda x: (len(x[1]), x[2]))
        
        # add next taxon
        tree, rest, bound = queue.pop(0)
        
        if rest:
            next_taxon = rest.pop()
            nodes = list(nwk.nodes_in_tree(tree))
            random.shuffle(nodes)
            for node in nwk.nodes_in_tree(tree):
                new_tree = tree.replace(node, '('+next_taxon+','+node+')')
                visited += 1

                # parsimony evaluation and lower bound comes here
                score = 0
                ltree = nwk.LingPyTree(new_tree)
                for p,t,c in zip(patterns, transitions, characters):

                    weight,chars  = sankoff_parsimony_up(
                            p,
                            taxa,
                            ltree,
                            t,
                            c,
                            weight_and_chars =True
                            )
                    score += weight

                if rest:
                    if tuple(rest) in M:
                        mst = M[tuple(rest)]
                    else:
                        mst = mst_weight(rest, patterns, transitions, characters)
                        M[tuple(rest)] = mst
                    score += mst
                
                else:
                    all_trees += [new_tree]

                if score <= lower_bound:
                    r = [x for x in rest]
                    random.shuffle(r)
                    queue += [(new_tree, r, score)]
                    
                    if not rest:
                        if score < lower_bound:
                            lower_bound = score
                            trees = [nwk.sort_tree(new_tree)]
                            queue = [q for q in queue if q[2] <= lower_bound]
                        else:
                            trees += [nwk.sort_tree(new_tree)]
                if len(all_trees) % sample_steps == 0 and len(all_trees) > previous:
                    print("[i] Checked {0} trees so far, current lower bound is {1} with {2} best trees.".format(
                        len(all_trees),
                        lower_bound,
                        len(trees)
                        ))
                    previous = len(all_trees)

    return trees, lower_bound