Ejemplo n.º 1
0
def remove_kink(node, curroot):
    """
	smooth the kink created by prunning
	to prevent creating orphaned tips
	after prunning twice at the same node
	"""
    if node == curroot:
        print "fix bifurcating root by moving the root away to an adjacent none-tip"
        if curroot.nchildren == 1:
            curroot = curroot.children[0]
            curroot.parent = None
        assert curroot.nchildren == 2, \
         "check tree root format: "+newick3.tosting(curroot)+";"
        if curroot.children[0].istip:  #the other child is not tip
            curroot = phylo3.reroot(curroot, curroot.children[1])
        else:
            curroot = phylo3.reroot(curroot, curroot.children[0])
    else:
        #---node---< all nodes should have one child only now
        length = node.length + (node.children[0]).length
        par = node.parent
        kink = node
        node = node.children[0]
        #parent--kink---node<
        par.remove_child(kink)
        par.add_child(node)
        node.length = length
    return node, curroot
Ejemplo n.º 2
0
def remove_kink(node,curroot):
	"""
	smooth the kink created by prunning
	to prevent creating orphaned tips
	after prunning twice at the same node
	"""
	if node == curroot:
		print "fix bifurcating root by moving the root away to an adjacent none-tip"
		if curroot.nchildren == 1:
			curroot = curroot.children[0]
			curroot.parent = None
		assert curroot.nchildren == 2, \
			"check tree root format: "+newick3.tosting(curroot)+";"
		if curroot.children[0].istip: #the other child is not tip
			curroot = phylo3.reroot(curroot,curroot.children[1])
		else:
			curroot = phylo3.reroot(curroot,curroot.children[0])
	else:
		#---node---< all nodes should have one child only now
		length = node.length + (node.children[0]).length
		par = node.parent
		kink = node
		node = node.children[0]
		#parent--kink---node<
		par.remove_child(kink)
		par.add_child(node)
		node.length = length
	return node,curroot
Ejemplo n.º 3
0
def remove_kink(node,curroot):
	if node == curroot and curroot.nchildren == 2:
		#move the root away to an adjacent none-tip
		if curroot.children[0].istip: #the other child is not tip
			curroot = phylo3.reroot(curroot,curroot.children[1])
		else: curroot = phylo3.reroot(curroot,curroot.children[0])
	#---node---< all nodes should have one child only now
	length = node.length + (node.children[0]).length
	par = node.parent
	kink = node
	node = node.children[0]
	#parent--kink---node<
	par.remove_child(kink)
	par.add_child(node)
	node.length = length
	return node,curroot
Ejemplo n.º 4
0
def remove_kink(node,curroot):
	if node == curroot and curroot.nchildren == 2:
		#move the root away to an adjacent none-tip
		if curroot.children[0].istip: #the other child is not tip
			curroot = phylo3.reroot(curroot,curroot.children[1])
		else: curroot = phylo3.reroot(curroot,curroot.children[0])
	#---node---< all nodes should have one child only now
	length = node.length + (node.children[0]).length
	par = node.parent
	kink = node
	node = node.children[0]
	#parent--kink---node<
	par.remove_child(kink)
	par.add_child(node)
	node.length = length
	return node,curroot
Ejemplo n.º 5
0
def extract_rooted_ingroup_clades(root, ingroups, outgroups, min_ingroup_taxa):
    """
	input a tree with ingroups and at least 1 outgroups
	output a list of rooted ingroup clades
	"""
    inclades = []
    while True:
        max_score, direction, max_node = 0, "", None
        for node in root.iternodes():
            front, back = 0, 0
            front_names_set = set(get_front_names(node))
            for name in front_names_set:
                if name in outgroups:
                    front = -1
                    break
                elif name in ingroups:
                    front += 1
                else:
                    sys.exit("Check taxonID " + name)
            back_names_set = set(get_back_names(node, root))
            for name in back_names_set:
                if name in outgroups:
                    back = -1
                    break
                elif name in ingroups:
                    back += 1
                else:
                    sys.exit("Check taxonID " + name)
            if front > max_score:
                max_score, direction, max_node = front, "front", node
            if back > max_score:
                max_score, direction, max_node = back, "back", node
        #print max_score,direction
        if max_score >= min_ingroup_taxa:
            if direction == "front":
                inclades.append(max_node)
                kink = max_node.prune()
                if len(root.leaves()) > 3:
                    newnode, root = remove_kink(kink, root)
                else:
                    break
            elif direction == "back":
                par = max_node.parent
                par.remove_child(max_node)
                max_node.prune()
                inclades.append(phylo3.reroot(root, par))  #flip dirction
                if len(max_node.leaves()) > 3:
                    max_node, root = remove_kink(max_node, max_node)
                else:
                    break
        else:
            break
    return inclades
def remove_kink(node,curroot):
	"""
	smooth the kink created by prunning
	to prevent creating orphaned tips
	after prunning twice at the same node
	"""
	if node == curroot and curroot.nchildren == 2:
		#move the root away to an adjacent none-tip
		if curroot.children[0].istip: #the other child is not tip
			curroot = phylo3.reroot(curroot,curroot.children[1])
		else: curroot = phylo3.reroot(curroot,curroot.children[0])
	#---node---< all nodes should have one child only now
	length = node.length + (node.children[0]).length
	par = node.parent
	kink = node
	node = node.children[0]
	#parent--kink---node<
	par.remove_child(kink)
	par.add_child(node)
	node.length = length
	return node,curroot
Ejemplo n.º 7
0
def cut_long_branches(curroot, cutoff):
    going = True
    subtrees = []  #store all subtrees after cutting
    if curroot.nchildren == 2:  #fix the root
        #move the root away to an adjacent none-tip internal node
        if curroot.children[0].istip:  #the other child is not tip
            curroot = phylo3.reroot(curroot, curroot.children[1])
        else:  #tree has >=4 leaves so the other node cannot be tip
            curroot = phylo3.reroot(curroot, curroot.children[0])
    while going:
        going = False  #only keep going if long branches were found during last round
        for node in curroot.iternodes():  #Walk through nodes
            if node != curroot and node.length > cutoff:
                subtrees.append(node)
                node = node.prune()
                if len(curroot.leaves()) >= 4:
                    node, curroot = remove_kink(node, curroot)
                    going = True
                break
    subtrees.append(curroot)  #write out the residue after cutting
    return subtrees
Ejemplo n.º 8
0
def cut_long_branches(curroot,cutoff):
	going = True
	subtrees = [] #store all subtrees after cutting
	if curroot.nchildren == 2: #fix the root
		#move the root away to an adjacent none-tip internal node
		if curroot.children[0].istip: #the other child is not tip
			curroot = phylo3.reroot(curroot,curroot.children[1])
		else: #tree has >=4 leaves so the other node cannot be tip
			curroot = phylo3.reroot(curroot,curroot.children[0])
	while going:
		going = False #only keep going if long branches were found during last round
		for node in curroot.iternodes(): #Walk through nodes
			if node != curroot and node.length > cutoff:
				subtrees.append(node)
				node = node.prune()
				if len(curroot.leaves()) >= 4:
					node,curroot = remove_kink(node,curroot)
					going = True
				break
	subtrees.append(curroot) #write out the residue after cutting
	return subtrees
Ejemplo n.º 9
0
def reroot_with_monophyletic_outgroups(root):
	lvs = root.leaves()
	outgroup_matches = {} #key is label, value is the tip node object
	#Since no taxon repeat in outgroups name and leaf is one-to-one
	outgroup_labels = []
	for leaf in lvs:
		label = leaf.label
		name = get_name(label)
		if name in OUTGROUPS:
			outgroup_matches[label] = leaf
			outgroup_labels.append(leaf.label)
	if len(outgroup_labels) == 1: #one single outgroup
		#cannot reroot on a tip so have to go one more node into the ingroup
		new_root = outgroup_matches[outgroup_labels[0]].parent
		return phylo3.reroot(root,new_root)
	else: #has multiple outgroups. Check monophyly and reroot
		newroot = None
		for node in root.iternodes():
			if node == root: continue #skip the root
			front_names = get_front_names(node)
			back_names = get_back_names(node,root)
			front_in_names,front_out_names,back_in_names,back_out_names = 0,0,0,0
			for i in front_names:
				if i in OUTGROUPS: front_out_names += 1
				else: front_in_names += 1
			for j in back_names:
				if j in OUTGROUPS: back_out_names += 1
				else: back_in_names += 1
			if front_in_names==0 and front_out_names>0 and back_in_names>0 and back_out_names==0:
				newroot = node #ingroup at back, outgroup in front
				break
			if front_in_names>0 and front_out_names==0 and back_in_names==0 and back_out_names>0:
				newroot = node.parent #ingroup in front, outgroup at back
				break
		if newroot != None:
			return phylo3.reroot(root,newroot)
		else: return None
Ejemplo n.º 10
0
def extract_rooted_ingroup_clades(root,ingroups,outgroups,min_ingroup_taxa):
	"""
	input a tree with ingroups and at least 1 outgroups
	output a list of rooted ingroup clades
	"""
	inclades = []
	while True:
		max_score,direction,max_node = 0,"",None
		for node in root.iternodes():
			front,back = 0,0
			front_names_set = set(get_front_names(node))
			for name in front_names_set:
				if name in outgroups:
					front = -1
					break
				elif name in ingroups: front += 1
				else: sys.exit("Check taxonID "+name)
			back_names_set = set(get_back_names(node,root))
			for name in back_names_set:
				if name in outgroups:
					back = -1
					break
				elif name in ingroups: back += 1
				else: sys.exit("Check taxonID "+name)
			if front > max_score:
				max_score,direction,max_node = front,"front",node
			if back > max_score:
				max_score,direction,max_node = back,"back",node
		#print max_score,direction
		if max_score >= min_ingroup_taxa:
			if direction == "front":
				inclades.append(max_node)
				kink = max_node.prune()
				if len(root.leaves()) > 3:
					newnode,root = remove_kink(kink,root)
				else: break
			elif direction == "back":
				par = max_node.parent
				par.remove_child(max_node)
				max_node.prune()
				inclades.append(phylo3.reroot(root,par))#flip dirction
				if len(max_node.leaves()) > 3:
					max_node,root = remove_kink(max_node,max_node)
				else: break
		else: break
	return inclades
Ejemplo n.º 11
0
    import phylo3, newick3
    import sys

    if len(sys.argv) < 3:
        print __doc__
        print "usage: roottrees <treesfile> <outgroupsfile>"
        sys.exit(0)

    treesfname = sys.argv[1]
    outgroupsfname = sys.argv[2]

    treesfile = open(treesfname,"r")

    outgroupsfile = open(outgroupsfname,"r")
    outgroup_names = [line.strip() for line in outgroupsfile.readlines()]

    rooted_trees = []
    for line in treesfile:

        tree = newick3.parse(line)

        outgroup = phylo3.getMRCA(tree, outgroup_names)
        rooted_tree = phylo3.reroot(tree, outgroup)

        rooted_trees.append(rooted_tree)

    outfile = open(treesfname.rsplit(".tre",1)[0]+".rooted.tre","w")
    for tree in rooted_trees:
        outfile.write(newick3.to_string(tree)+";\n")