"right_value < ? AND name_class == 'scientific name' AND node_rank LIKE ?", (left_value, right_value, rank.strip())) return [row[0] for row in db_cursor.fetchall()] # for row in db_cursor.fetchall(): # print row # return [] if len(sys.argv) < 4: print("usage: fill_tips_from_taxonomy.py <treefile> <taxonomydb> <outfile>") sys.exit(0) tree = None with open(sys.argv[1],"r") as intree_file: tree = newick3.parse(intree_file.readline()) conn = sqlite3.connect(sys.argv[2]) c = conn.cursor() for tip in tree.leaves(): for child_name in get_child_taxa(tip.label, "genus", c): child = phylo3.Node() child.label = child_name child.istip = True # print child.label tip.add_child(child) # print([t.label for t in tip.children]) with open(sys.argv[3],"w") as outfile: # print(newick3.to_string(tree)+";") outfile.write(newick3.to_string(tree)+";")
all_found_l = False else: any_found_l = True # not sure if having two options here should have any effect... i think they are the same for practical purposes if (all_found_r and not any_found_l): # result["identical_side"] = "R" r_tree_string = "((" + ",".join(result["seq_labels"]["R"]) + "),(" + ",".join(result["seq_labels"]["L"]) + "));" elif (all_found_l and not any_found_r): # result["identical_side"] = "L" r_tree_string = "((" + ",".join(result["seq_labels"]["L"]) + "),(" + ",".join(result["seq_labels"]["R"]) + "));" result_tree = newick3.parse(r_tree_string) # write the result topology to the set of observed topologies for this node topo_file.write(newick3.to_string(result_tree)+";\n") # get the ICA score from phyx pxbp_outfile = "temp_pxbp_out." + node.label pxbp_args = ["pxbp", "-t", topo_file_name, ">", pxbp_outfile] subprocess.call(" ".join(pxbp_args),shell=True) # set default values: if we don't find a score in the pxbp output then this bipart is never observed ica = "-1" freq = "0" with open(pxbp_outfile,"r") as pxbp_result: for line in pxbp_result: parts = line.split("\t") if len(parts) > 1:
while len(tree.children) < 2: # prune knuckles at the root of the tree if necessary only_child = tree.children[0] only_child.parent = None only_child.isroot = True tree = only_child # cannot edit tree while traversing, so just record knuckles as we go knuckles = [] # first find the knuckles for parent in tree.iternodes(phylo3.PREORDER): if parent.istip: continue logfile.write("node '" + ( str(parent.label) if parent.label != None else str(parent)) + "' has " + str(len(parent.children)) + " children\n") if len(parent.children) == 1: logfile.write("\tthis node will be pruned\n") knuckles.append(parent) # now graft them out for k in knuckles: k.parent.add_child(k.children[0]) k.parent.remove_child(k) print newick3.to_string(tree) + ";"
"error: mrca in master tree must define a biparition, not a multifurcation!" ) # get the bipart *below* the master mrca, since all target taxa are contained by it master_bipart = get_bipart(master_mrca.children[0]) # print("master bipart: " + get_string(master_bipart)) # first check if the current root is already compatible, if so just reiterate original topology # THIS FAILS when there is a polytomy at the root of the input tree # if is_compatible(get_bipart(target), master_bipart): # print(newick3.to_string(target)+";\n") # sys.exit(0) for test_root in target.descendants(): test_bipart = get_bipart(test_root) if is_compatible(test_bipart, master_bipart): # just for debugging. otherwise result with not be valid newick # print("found node in input tree compatible with root node in master: " + get_string(test_bipart)) # print("old topology is: " + newick3.to_string(test_root)) rooted_tree = phylo3.get_tree_rooted_on(test_root) print(newick3.to_string(rooted_tree) + ";\n") sys.exit(0) # if we got here, then there was no bipartition in the input tree compatible with the root bipart in the master # ...assuming there are not bugs in script that might otherwise get here... sys.exit( "error: could not root tree. there may not be a bipartition in the target that is compatible with the root bipart in master!" )
#!/usr/bin/env python if __name__ == '__main__': import newick3, phylo3, sys if len(sys.argv) < 2: print "usage: print_tip_names <treefile>" sys.exit() treefname = sys.argv[1] treefile = open(treefname, "r") for line in treefile: tree = newick3.parse(line) print(newick3.to_string(tree, use_node_labels=False))
new_node.add_child(nodes.pop()) if make_brlens: new_node.length = random.random() * max_brlen nodes.append(new_node) return nodes[0] if __name__ == "__main__": description = "make a random tree with X tips" parser = argparse.ArgumentParser(description=description) parser.add_argument("-x", "--number-of-tips", type=int, nargs=1, required=False, help="The number of tips. If this is specified, tip labels will be numbers. Either this or a set of input tip labels must be specified.") parser.add_argument("-l", "--tip-labels", nargs='*', required=False, help="A set of tip labels for the random tree. If this is not set, then the number of tips must be specified, and tip labels will be numbers.") parser.add_argument("-m", "--max-branch-length", type=float, nargs=1, required=False, help="The upper bound for creating randomized branch lengths. If not set, branch lengths will be zero. This argument cannot be used in conjuction with the TREE_AGE argument.") args = parser.parse_args() if (args.number_of_tips != None and args.tip_labels != None) or (args.number_of_tips == None and args.tip_labels == None): sys.exit("You must specify either the number of tips or the tip labels, but not both") tip_labels = args.tip_labels if args.tip_labels != None else [str(i) for i in range(1, args.number_of_tips[0]+1)] max_brlen = args.max_branch_length[0] if args.max_branch_length != None else 0 make_brlens = True if max_brlen > 0 else False print(newick3.to_string(nodes[0], use_branch_lengths=make_brlens) + ";")
tree = newick3.parse(line) while len(tree.children) < 2: # prune knuckles at the root of the tree if necessary only_child = tree.children[0] only_child.parent = None only_child.isroot = True tree = only_child # cannot edit tree while traversing, so just record knuckles as we go knuckles = [] # first find the knuckles for parent in tree.iternodes(phylo3.PREORDER): if parent.istip: continue logfile.write("node '" + (str(parent.label) if parent.label != None else str(parent)) + "' has " + str(len(parent.children)) + " children\n") if len(parent.children) == 1: logfile.write("\tthis node will be pruned\n") knuckles.append(parent) # now graft them out for k in knuckles: k.parent.add_child(k.children[0]) k.parent.remove_child(k) print newick3.to_string(tree) + ";"
) parser.add_argument( "-m", "--max-branch-length", type=float, nargs=1, required=False, help= "The upper bound for creating randomized branch lengths. If not set, branch lengths will be zero. This argument cannot be used in conjuction with the TREE_AGE argument." ) args = parser.parse_args() if (args.number_of_tips != None and args.tip_labels != None) or (args.number_of_tips == None and args.tip_labels == None): sys.exit( "You must specify either the number of tips or the tip labels, but not both" ) tip_labels = args.tip_labels if args.tip_labels != None else [ str(i) for i in range(1, args.number_of_tips[0] + 1) ] max_brlen = args.max_branch_length[ 0] if args.max_branch_length != None else 0 make_brlens = True if max_brlen > 0 else False print(newick3.to_string(nodes[0], use_branch_lengths=make_brlens) + ";")
# get one node at random from the tree n = random.sample(list(get_nodes(tree)), 1)[0] # create a new internal node to be the parent of the tip we're about to add new_parent = phylo3.Node() if n.parent == None: # root case: new parent becomes the new root of the tree tree = new_parent else: # non-root case: randomly place new parent on the branch leading to n p = n.parent p.remove_child(n) p.add_child(new_parent) # reattach n to the new parent new_parent.add_child(n) # update branch lengths new_parent.length = random.uniform(min_branch_length, n.length - min_branch_length) n.length = n.length - new_parent.length # add the new tip as a child of the new parent new_tip = phylo3.Node() new_tip.label = name new_tip.istip = True new_tip.length = new_parent.depth - new_parent.length new_parent.add_child(new_tip) print(newick3.to_string(tree))
#!/usr/bin/env python import sys, newick3, phylo3 if len(sys.argv) < 3: sys.exit("usage: extract_subtree.py <treefile> <mrca1> <mrca2>") tree_file_name = sys.argv[1] mrca1 = sys.argv[2].strip() mrca2 = sys.argv[3].strip() with open(tree_file_name) as treefile: tree = newick3.parse(treefile.readline()) print newick3.to_string(phylo3.get_mrca(tree, [mrca1, mrca2]))
def validate_tree_type(t): if t == 'pectinate': return get_pectinate_tree elif t == 'balanced': return get_balanced_tree else: sys.exit('invalid tree type: ' + t) if __name__ == '__main__': parser = argparse.ArgumentParser('') parser.add_argument('-n', '--number-of-tips', type=int, required=True, \ help='The number of tips to be in the final tree') parser.add_argument('-b', '--mean-branch-length', type=float, required=True, \ help='The length to be used for internal branches') parser.add_argument('-t', '--tree-generator-method', type=validate_tree_type, required=True, \ help='The type of tree to generate. Must be either "pectinate" or "balanced".') args = parser.parse_args() sys.setrecursionlimit(args.number_of_tips+100) # currently just supports constant rate t = args.tree_generator_method(args.number_of_tips, lambda x: x, args.mean_branch_length) print(newick3.to_string(t) + ';')
# not sure if having two options here should have any effect... i think they are the same for practical purposes if (all_found_r and not any_found_l): # result["identical_side"] = "R" r_tree_string = "((" + ",".join( result["seq_labels"]["R"]) + "),(" + ",".join( result["seq_labels"]["L"]) + "));" elif (all_found_l and not any_found_r): # result["identical_side"] = "L" r_tree_string = "((" + ",".join( result["seq_labels"]["L"]) + "),(" + ",".join( result["seq_labels"]["R"]) + "));" result_tree = newick3.parse(r_tree_string) # write the result topology to the set of observed topologies for this node topo_file.write(newick3.to_string(result_tree) + ";\n") # get the ICA score from phyx pxbp_outfile = "temp_pxbp_out." + node.label pxbp_args = ["pxbp", "-t", topo_file_name, ">", pxbp_outfile] subprocess.call(" ".join(pxbp_args), shell=True) # set default values: if we don't find a score in the pxbp output then this bipart is never observed ica = "-1" freq = "0" with open(pxbp_outfile, "r") as pxbp_result: for line in pxbp_result: parts = line.split("\t") if len(parts) > 1: