예제 #1
0
def sankoff(tree, possible_labels = ["LL", "RE", "RW", "M1", "M2", "Liv"]):
    
    root = [n for n in tree if tree.in_degree(n) == 0][0]
        
    tree = cmp.set_depth(tree, root)
    max_depth = cmp.get_max_depth(tree, root)
    tree = cmp.extend_dummy_branches(tree, max_depth)
    
    C = np.full((len(tree.nodes), len(possible_labels)), np.inf)
    
    # create maps betwene names and row/col of dynamic programming array, C
    bfs_postorder = [root]
    for e0, e1 in nx.bfs_edges(tree, root):
        bfs_postorder.append(e1)
    
    node_to_i = dict(zip(bfs_postorder, range(len(tree.nodes))))
    label_to_j = dict(zip(possible_labels, range(len(possible_labels))))

    # instantiate the dynamic programming matrix at leaves
    _leaves = [n for n in tree.nodes if tree.out_degree(n) == 0]
    for l in _leaves:
        label = tree.nodes[l]["label"]
        
        i, j = node_to_i[l], label_to_j[label]
        C[i, j] = 0
        
            
    C = sankoff_fill_C(tree, root, C, node_to_i, label_to_j)
            
    return C
예제 #2
0
def compute_transitions_majority_vote(t, meta):

    possible_labels = meta.unique()

    M = len(possible_labels)
    C = np.zeros((M, M))
    label_to_j = dict(zip(possible_labels, range(len(possible_labels))))

    root = [n for n in t if t.in_degree(n) == 0][0]
    t = small_parsimony.assign_labels(t, meta)

    t = cmp.set_depth(t, root)

    t = assign_majority_vote(t, root)

    # now count transitions
    for v in nx.dfs_postorder_nodes(t, source=root):

        v_lab = t.nodes[v]['label']
        i = label_to_j[v_lab]

        children = list(t.successors(v))
        for c in children:

            c_lab = t.nodes[c]['label']
            j = label_to_j[c_lab]

            C[i, j] += 1

    count_mat = pd.DataFrame(C)
    count_mat.columns = possible_labels
    count_mat.index = possible_labels
    return count_mat
예제 #3
0
def fitch_count(t, meta):

    root = [n for n in t if t.in_degree(n) == 0][0]
    
    t = small_parsimony.assign_labels(t, meta) 
    
    possible_labels = meta.unique()

    t = cmp.set_depth(t, root)
    t = small_parsimony.fitch_hartigan_bottom_up(t, root, possible_labels)

    bfs_postorder = [root]
    for e0, e1 in nx.bfs_edges(t, root):
        bfs_postorder.append(e1)

    node_to_i = dict(zip(bfs_postorder, range(len(t.nodes))))
    label_to_j = dict(zip(possible_labels, range(len(possible_labels))))

    L = small_parsimony._N(t, possible_labels, node_to_i, label_to_j)

    C = small_parsimony._C(t, L, possible_labels, node_to_i, label_to_j)

    M = pd.DataFrame(np.zeros((L.shape[1], L.shape[1])))
    M.columns = possible_labels
    M.index = possible_labels 

    # count_mat: transitions are rows -> columns
    for s1 in possible_labels:
        for s2 in possible_labels:
            M.loc[s1, s2] = np.sum(C[node_to_i[root], :, label_to_j[s1], label_to_j[s2]])

    return M
예제 #4
0
def naive_fitch(t, meta):

    root = [n for n in t if t.in_degree(n) == 0][0]
    t = small_parsimony.assign_labels(t, meta)
    possible_labels = meta.unique()

    t = cmp.set_depth(t, root)

    label_to_j = dict(zip(possible_labels, range(len(possible_labels))))

    M = small_parsimony.draw_one_solution(t, possible_labels, label_to_j)

    M = pd.DataFrame(M)
    M.columns = possible_labels
    M.index = possible_labels

    return M
예제 #5
0
def fitch_hartigan(tree):
    """
    Runs the Hartigan algorithm (a generalization to Fitch's algorithm on nonbinary trees)
    on tree given the labels for each leaf. Returns the tree with labels on internal node.
    """

    _leaves = [n for n in tree if tree.out_degree(n) == 0]
    root = [n for n in tree if tree.in_degree(n) == 0][0]

    # form candidate set of labels for each internal node
    S = np.unique(np.concatenate([tree.nodes[l]['S1'] for l in _leaves]))

    tree = cmp.set_depth(tree, root)
    tree = fitch_hartigan_bottom_up(tree, root, S)

    tree = fitch_hartigan_top_down(tree, root)

    return tree
예제 #6
0
def compute_transitions_naive(t, meta):

    root = [n for n in t if t.in_degree(n) == 0][0]
    t = assign_labels(t, meta)
    possible_labels = meta.unique()

    t = cmp.set_depth(t, root)
    t = fitch_parsimony.fitch_bottom_up(t, root)

    label_to_j = dict(zip(possible_labels, range(len(possible_labels))))

    t = fitch_parsimony.reconcile_fitch(t)

    count_mat = fitch_parsimony.draw_one_fitch_solution(t, possible_labels, label_to_j)

    count_mat = pd.DataFrame(count_mat)
    count_mat.columns = possible_labels
    count_mat.index = possible_labels

    return count_mat
예제 #7
0
def compute_transitions(t, meta, count_unique = False):

	root = [n for n in t if t.in_degree(n) == 0][0]
	t = assign_labels(t, meta)
	possible_labels = meta.unique()

	t = cmp.set_depth(t, root)
	t = fitch_parsimony.fitch_bottom_up(t, root)

	bfs_postorder = [root]
	for e0, e1 in nx.bfs_edges(t, root):
		bfs_postorder.append(e1)

	node_to_i = dict(zip(bfs_postorder, range(len(t.nodes))))
	label_to_j = dict(zip(possible_labels, range(len(possible_labels))))

	root_labs = t.nodes[root]['label']
	#if 'LL' not in root_labs:
	#	t.nodes[root]['label'] = np.concatenate((root_labs, ['LL']))

	t = fitch_parsimony.reconcile_fitch(t)

	L = fitch_parsimony.count_opt_solutions(t, possible_labels, node_to_i, label_to_j)

	obs_transitions = defaultdict(list)

	C = fitch_parsimony.count_num_transitions(t, L, possible_labels, node_to_i, label_to_j, count_unique = count_unique)

	count_mat = pd.DataFrame(np.zeros((L.shape[1], L.shape[1])))
	count_mat.columns = possible_labels
	count_mat.index = possible_labels 

	# count_mat: transitions are rows -> columns
	for s1 in possible_labels:
		for s2 in possible_labels:
			count_mat.loc[s1, s2] = np.sum(C[node_to_i[root], :, label_to_j[s1], label_to_j[s2]])

	return count_mat