def tree_structure_to_Rtree_structure(leaf_nodes, edge_lengths, root_name): ready_lineages = leaf_nodes waiting_lineages = [] res_tree = {} while len(ready_lineages) > 1 or len(waiting_lineages) > 0: node = ready_lineages.pop() parents = node.get_parents() added_node = [None] * 5 for parent in parents: d = edge_lengths[(parent.name, node.name)] if d is None: added_node[1] = parent.name added_node[1 + 3] = 0 else: added_node[0] = parent.name added_node[0 + 3] = float(d) if parent.name in waiting_lineages: waiting_lineages.remove(parent.name) ready_lineages.append(parent) elif parent.is_admixture(): ready_lineages.append(parent) else: waiting_lineages.append(parent.name) res_tree[node.name] = added_node tree = rename_rootname(res_tree, old_name=root_name, new_name='r') for key, val in tree.items(): print key, ':', val return insert_children_in_tree(tree)
def read_treemix_file(filename_treeout, filename_vertices, filename_edges, outgroup=None): np = new_node_naming_policy() if filename_treeout.endswith('.gz'): filename_treeout = unzip(filename_treeout) if filename_vertices.endswith('.gz'): filename_vertices = unzip(filename_vertices) if filename_edges.endswith('.gz'): filename_edges = unzip(filename_edges) with open(filename_treeout, 'r') as f: newick_tree = f.readline().rstrip() admixtures = parse_admixtures(map(str.rstrip, f.readlines())) edges = get_edge_lengths2(filename_edges) #print newick_tree tree, translates = parse_newick_tree(newick_tree) vd = vertice_dictionary() for adm_key, treemix_N_key in translates.items(): vd.insert_mapping(adm_key, treemix_N_key, 'AdmB', 'Treemix_N') #print '-------------------------' #print vd vd, adm_vertices = match_vertices(filename_vertices, vd) #matched_admixtures=match_admixtures(admixtures, adm_vertices) # print '-------------------------' # print vd # print adm_vertices edges = get_edge_lengths(filename_edges) # print edges tree = insert_children_in_tree(tree) reverse_translates = {v: k for k, v in translates.items()} # for k,c in translates.items(): # print k, ':', c # for k,v in tree.items(): # print k,':',v # print translates # print admixtures tree = add_admixtures(tree, vd, adm_vertices, edges, admixtures) if outgroup is not None: tree = rearrange_root(tree, outgroup) return tree
def identifier_to_tree(identifier, leaves=None, inner_nodes=None, branch_lengths=None, admixture_proportions=None): ''' Transforms an identifier of the form qwert-uio-asdfg-jk into a dictionary tree using the generators of leaves, inner_nodes, branch_lengths and admixture_proportions. ''' levels=identifier.split('-') n_leaves=len(levels[0].split('.')) #initiate leaves if leaves is None: leaf_values=sorted(get_trivial_nodes(n_leaves)) else: leaf_values=[leaves() for _ in range(n_leaves)] tree={leaf:[None]*5 for leaf in leaf_values} trace_lineages=[(leaf,0) for leaf in leaf_values] #initiate generators if inner_nodes is None: inner_nodes=generate_numbered_nodes('n') if branch_lengths is None: def f(): return 1.0 branch_lengths= f if admixture_proportions is None: def g(): return 0.4 admixture_proportions=g for level in levels: identifier_lineages=level.split('.') assert len(trace_lineages)==len(identifier_lineages), 'the number of traced lineages did not match the number of lineages in the identifier '+\ '\n\n'+'trace_lineages:'+'\n'+str(trace_lineages)+\ '\n\n'+'identifier_lineages:'+'\n'+str(identifier_lineages) parent_index={} indexes_to_be_removed=[] for n,identifier_lineage in enumerate(identifier_lineages): if identifier_lineage=='c': ##there is a coalecence for the n'th lineage, and it should be replaced by a new lineage new_key=inner_nodes() old_key,old_branch=trace_lineages[n] new_branch_length=branch_lengths() tree=update_parent_and_branch_length(tree, old_key, old_branch, new_key, new_branch_length) tree[new_key]=[None]*5 parent_index[n]=new_key trace_lineages[n]=(new_key,0) elif identifier_lineage=='w': pass elif identifier_lineage=='a': new_key=inner_nodes(admixture=True) old_key,old_branch=trace_lineages[n] new_branch_length=branch_lengths() tree=update_parent_and_branch_length(tree, old_key, old_branch, new_key, new_branch_length) new_admixture_proportion=admixture_proportions() tree[new_key]=[None,None,new_admixture_proportion,None,None] trace_lineages[n]=(new_key,0) trace_lineages.append((new_key,1)) else: ##there is a coalescence but this lineage disappears try: new_key=parent_index[int(identifier_lineage)] except KeyError as e: print e print 'new_key', new_key print 'parent_index', parent_index print 'identifier_lineage', identifier_lineage print pretty_string(insert_children_in_tree(tree)) old_key,old_branch=trace_lineages[n] new_branch_length=branch_lengths() tree=update_parent_and_branch_length(tree, old_key, old_branch, new_key, new_branch_length) indexes_to_be_removed.append(n) ##remove lineages trace_lineages=[trace_lineage for n,trace_lineage in enumerate(trace_lineages) if n not in indexes_to_be_removed] root_key=new_key del tree[root_key] tree=rename_root(tree, new_key) return insert_children_in_tree(tree)
break return admixed_populations def get_populations_string(tree, min_w=0.0, keys_to_include=None): return '-'.join(get_populations(tree, min_w, keys_to_include)) if __name__ == "__main__": from tree_plotting import pretty_print, plot_as_directed_graph from Rtree_operations import insert_children_in_tree tree_clean = insert_children_in_tree({ 's1': ['s1s2', None, None, 0.1, None], 's2': ['s1s2', None, None, 0.1, None], 's1s2': ['r', None, None, 0.2, None], 's3': ['r', None, None, 0.2, None] }) tree_one_admixture = insert_children_in_tree({ 's1': ['s1b', None, None, 0.1, None], 's1b': ['s1s2', 's3b', 0.2, 0.1, 0.2], 's2': ['s1s2', None, None, 0.1, None], 's1s2': ['r', None, None, 0.2, None], 's3b': ['r', None, None, 0.2, None], 's3': ['s3b', None, None, 0.2, None] }) tree_two_admixture = insert_children_in_tree({ 's1': ['s1b', None, None, 0.1, None], 's1c': ['s1s2', 's3b', 0.4, 0.05, 0.1],
'a2': ['n7', 'n4', 0.613, 0.173875014, 0.010851242, 'a1', None], 'n4': ['n7', None, None, 0.040479891000000004, None, 's4', 'a2'], 'n7': ['n8', None, None, 0.001569646, None, 'a2', 'n4'] } for a, e in tree.items(): print a print '\t', str(e) add = 0 #tree,add=({'s3': ['n2', None, None, 0.3333333333333333, None, None, None], 's6': ['r', None, None, 0.8333333333333333, None, None, None], 'n1': ['n2', None, None, 0.16666666666666666, None, 's1', 's2'], 'n2': ['n3', None, None, 0.16666666666666666, None, 's3', 'n1'], 'n3': ['n4', None, None, 0.16666666666666666, None, 's4', 'n2'], 'n4': ['r', None, None, 0.16666666666666666, None, 's5', 'n3']}, 0) print pretty_string(tree) plot_as_directed_graph(tree) import sys sys.exit() from Rtree_operations import insert_children_in_tree, create_trivial_tree from Rcatalogue_of_trees import tree_on_the_border2 tree2 = insert_children_in_tree(tree_on_the_border2) trouble2 = { 'a': [ 'n17', 'n18', 0.5, 0.0006670327290825764, 0.04000000000000001, 's2', None ], 'c': [ 'n15', 'r', 0.5, 0.02087163982263861, 0.4814480657456043, 'n18', None ], 'n16': ['n17', None, None, 0.005272434567465561, None, 's4', 's3'], 'n17': ['n18', None, None, 0.013899593800954894, None, 'a', 'n16'], 'n15': ['r', None, None, 0.05969046586907494, None, 'c', 's1'], 's3': ['n16', None, None, 0.07815645814883887, None, None, None], 's2': ['a', None, None, 0.05, None, None, None], 's1': ['n15', None, None, 0.5947563021746359, None, None, None],
} tree_2c = { 's1': ['b', None, None, 0.1, None, None, None], 's2': ['e', None, None, 0.05, None, None, None], 's3': ['e', None, None, 0.3, None, None, None], 's4': ['b', None, None, 0.3, None, None, None], 'a': ['b', 'c', 0.5, 0.2, 0.1, 's4', None], 'c': ['e', 'd', 0.5, 0.1, 0.1, 'a', None], 'b': ['f', None, None, 0.05, None, 's4', 's1'], 'f': ['r', None, None, 0.02, None, 'b', 'e'], 'e': ['f', None, None, 0.05, None, 's2', 's3'], 'd': ['r', None, None, 0.05, None, 's1', 'c'] } a1 = tree_prior(insert_children_in_tree(tree_2e2a)) a2 = tree_prior(tree_3e1a) a3 = tree_prior(tree_4e) a4 = tree_prior(tree_2e2c) a5 = tree_prior(tree_1e1a1c) a6 = tree_prior(tree_2a1c) a7 = tree_prior(tree_2c) n1 = 6 n2 = 4 n3 = 1 n4 = 6 n5 = 12 n6 = 6 n7 = 3 print 'tree_2e2a', a1, n1, a1 * n1