예제 #1
0
def sankoff(tree, possible_labels = ["LL", "RE", "RW", "M1", "M2", "Liv"]):
    
    root = [n for n in tree if tree.in_degree(n) == 0][0]
        
    tree = cmp.set_depth(tree, root)
    max_depth = cmp.get_max_depth(tree, root)
    tree = cmp.extend_dummy_branches(tree, max_depth)
    
    C = np.full((len(tree.nodes), len(possible_labels)), np.inf)
    
    # create maps betwene names and row/col of dynamic programming array, C
    bfs_postorder = [root]
    for e0, e1 in nx.bfs_edges(tree, root):
        bfs_postorder.append(e1)
    
    node_to_i = dict(zip(bfs_postorder, range(len(tree.nodes))))
    label_to_j = dict(zip(possible_labels, range(len(possible_labels))))

    # instantiate the dynamic programming matrix at leaves
    _leaves = [n for n in tree.nodes if tree.out_degree(n) == 0]
    for l in _leaves:
        label = tree.nodes[l]["label"]
        
        i, j = node_to_i[l], label_to_j[label]
        C[i, j] = 0
        
            
    C = sankoff_fill_C(tree, root, C, node_to_i, label_to_j)
            
    return C
예제 #2
0
def sankoff_fill_C(tree, root, C, node_to_i, label_to_j):

    md = cmp.get_max_depth(tree, root)

    # bottom up approach
    d = md - 1
    while d >= 0:
        
        internal_nodes = cmp.cut_tree(tree, d)
        for int_node in internal_nodes:
            children = list(tree.successors(int_node))
            
            # compute min cost for each child
            for s in range(C.shape[1]):
                opt_cost = 0
                for child in children:
                    i = node_to_i[child]
                    min_cost = np.min([SANKOFF_SIGMA(s, sp) + C[i, sp] for sp in range(C.shape[1])])
                    opt_cost += min_cost
                    
                C[node_to_i[int_node], s] = opt_cost
            
        d -= 1 
        
    return C
예제 #3
0
def fitch_hartigan_top_down(tree, root):
    # Run Hartigan's top-down refinement, selecting one optimal solution from tree rooted at a
    # defined root.

    md = cmp.get_max_depth(tree, root)

    # Phase 2: top down assignment
    tree.nodes[root]["label"] = np.random.choice(tree.nodes[root]["S1"])
    d = 1
    while d <= md:

        internal_nodes = list(cmp.cut_tree(tree, d))

        for i in internal_nodes:

            parent = list(tree.predecessors(i))[0]

            if tree.nodes[parent]['label'] in tree.nodes[i]['S1']:
                tree.nodes[i]['label'] = tree.nodes[parent]['label']

            elif tree.nodes[parent]['label'] in tree.nodes[i]['S2']:

                choices = tree.nodes[i]['S1']
                # choices = np.union1d(tree.nodes[parent]['label'], tree.nodes[i]['S1'])
                tree.nodes[i]['label'] = np.random.choice(choices)

            else:
                tree.nodes[i]['label'] = np.random.choice(tree.nodes[i]['S1'])

        d += 1

    return tree
예제 #4
0
def sample_sankoff_path(tree, C, possible_labels=["LL", "RE", "RW", "M1", "M2", "Liv"]):
    
    def choose_assignment(vals):
        return np.random.choice(vals)
    
    root = [n for n in tree if tree.in_degree(n) == 0][0]
    
    bfs_postorder = [root]
    for e0, e1 in nx.bfs_edges(tree, root):
        bfs_postorder.append(e1)
    
    node_to_i = dict(zip(bfs_postorder, range(len(tree.nodes))))
    label_to_j = dict(zip(possible_labels, range(len(possible_labels))))
    
    
    assignments = {}
    
    # choose a minimal cost assignment for root based on C
    pars = np.amin(C[node_to_i[root], :])
    assignments[root] = choose_assignment(np.where(C[node_to_i[root], :] == pars)[0])
    
    # sankoff top down 
    md = cmp.get_max_depth(tree, root)
    
    # bottom up approach
    d = 0
    while d <= md: 
        
        internal_nodes = cmp.cut_tree(tree, d)
        for int_node in internal_nodes:
            children = list(tree.successors(int_node))
            s = assignments[int_node]
            
            # compute optimal assignment for child
            for c in children:
                c_i = node_to_i[c]
                value_arr = [SANKOFF_SIGMA(s, sp) + C[c_i, sp] for sp in range(C.shape[1])]
                m = np.amin(value_arr)
                assignments[c] = choose_assignment(np.where(value_arr == m)[0])
        d += 1 
                    
    
    # convert assignments back to true labels
    
    for k in assignments.keys():
        assignments[k] = possible_labels[assignments[k]]
        
    nx.set_node_attributes(tree, assignments, "label")
    return tree, pars
예제 #5
0
def fitch_hartigan_bottom_up(tree, root, S):
    # run Haritigan's bottom up phase on an input tree with a specified root and alphabet of internal nodes
    # stored in S

    md = cmp.get_max_depth(tree, root)

    # bottom up approach
    d = md
    while d >= 0:

        internal_nodes = cmp.cut_tree(tree, d)
        for i in internal_nodes:
            children = list(tree.successors(i))

            if len(children) == 1:
                tree.nodes[i]["S1"] = tree.nodes[children[0]]["S1"]
                tree.nodes[i]['S2'] = []
                continue
            if len(children) == 0:
                if "S1" not in tree.nodes[i].keys():
                    raise Exception("This should have a label!")
                continue

            all_labels = np.concatenate(
                ([tree.nodes[c]['S1'] for c in children]))

            freqs = []
            for k in S:
                freqs.append(np.count_nonzero(all_labels == k))

            S1 = S[np.where(freqs == np.max(freqs))]
            S2 = S[np.where(freqs == (np.max(freqs) - 1))]

            tree.nodes[i]['S1'] = S1
            tree.nodes[i]['S2'] = S2

        d -= 1

    return tree