def create_tree(filepath='bird_phylogenic_tree.nex', num_trees=1): treelist = TreeList.get(path=filepath, schema="nexus") if num_trees == -1: num_trees = len(treelist) maps = [] for i in range(0, num_trees): outer_map = {} tree = treelist[i] # Iterate from root to tips of tree not including leaves. iterator = tree.ageorder_node_iter(include_leaves=False, descending=True) for node in iterator: # Looping nodes in tree children_iter = node.child_node_iter() for child_node in children_iter: # Looping through all child of node. if child_node.is_leaf(): # Add child_node as key to outer_map but first create inner_map leaf_of_node_list = node.leaf_nodes() leaf_of_node_list.remove(child_node) inner_map = create_inner_map(child_node, leaf_of_node_list) child_name = convert_name(child_node.taxon.__str__()) outer_map[child_name] = inner_map maps.append(outer_map) return maps
def get_bs_trees(self, bin_name): tl = TreeList.get(path=os.path.join(self.path, 'supergenes', bin_name, 'RAxML_bootstrap.bootstrap'), preserve_underscores=True, schema='newick') tree_upper(tl[0]) return tl
import argparse import re import sys from dendropy import TreeList parser = argparse.ArgumentParser( description='Check which nodes have duplicated names') parser.add_argument('treefile', type=argparse.FileType('r'), nargs='+', help='Any number of newick-format tree files') args = parser.parse_args() for f in args.treefile: trees = TreeList.get(file=f, schema='newick', preserve_underscores=True) tree = trees[0] count = {} for node in tree.preorder_internal_node_iter(): if node.label: count[node.label] = 1 + (count.get(node.label) or 0) tot = 0 for name, n in count.items(): if n > 1: print("Node name '{}' duplicated {} times".format(name, n)) tot = tot + n print("Total dups for {}: {}".format(f.name, tot))
def main(): print("Launching " + treeshrink.PROGRAM_NAME + " version " + treeshrink.PROGRAM_VERSION) parser = argparse.ArgumentParser() parser.add_argument("-i","--indir",required=False,help="The parent input directory where the trees (and alignments) can be found") parser.add_argument("-t","--tree",required=False,help="The name of the input tree/trees. If the input directory is specified (see -i option), each subdirectory under it must contain a tree with this name. Otherwise, all the trees can be included in this one file. Default: input.tre") parser.add_argument("-a","--alignment",required=False,help="The name of the input alignment; can only be used when the input directory is specified (see -i option). Each subdirectory under it must contain an alignment with this name. Default: input.fasta") parser.add_argument("-c","--centroid",required=False,action='store_true',help="Do centroid reroot in preprocessing. Highly recommended for large trees. Default: NO") parser.add_argument("-k","--k",required=False,help="The maximum number of leaves that can be removed. Default: auto-select based on the data; see also -s") parser.add_argument("-s","--kscaling",required=False,help="If -k not given, we use k=min(n/a,b*sqrt(n)) by default; using this option, you can set the a,b constants; Default: '5,2'") parser.add_argument("-q","--quantiles",required=False,help="The quantile(s) to set threshold. Default is 0.05") parser.add_argument("-b","--minimpact",required=False,help="Do not remove species on the per-species test if their impact on diameter is less than MINIPACT%% where x is the given value. Default: 5") parser.add_argument("-m","--mode",required=False,help="Filtering mode: 'per-species', 'per-gene', 'all-genes','auto'. Default: auto") parser.add_argument("-o","--outdir",required=False,help="Output directory. Default: the same as input directory (if it is specified) or the same as the input trees") parser.add_argument("-p","--tempdir",required=False,help="Directory to keep temporary files. If specified, the temp files will be kept") parser.add_argument("-r","--libdir",required=False,help="Directory of the R libraries and scripts. Default: 2 layers above the treeshrink package") args = vars(parser.parse_args()) MIN_OCC = 20 MIN_TREE_NUM = 20 libdir = args["libdir"] if args["libdir"] else dirname(dirname(realpath(treeshrink.__file__))) tempdir = set_tmp_dir(args["tempdir"]) quantiles = [ q for q in args["quantiles"].split()] if args["quantiles"] else ["0.05"] minimpact = (float(args["minimpact"])/100)+1 if args["minimpact"] else 1.05 scaling = [int(x) for x in args["kscaling"].split(",")] if args["kscaling"] else [5,2] if args["indir"]: treename = splitext(args["tree"])[0] if args["tree"] else "input" subdirs = [d for d in listdir(args["indir"]) if exists(normpath(join(args["indir"],d,args["tree"] if args["tree"] else "input.tre")))] intrees = get_tmp_file(treename + ".trees") with open(intrees,'w') as fout: for d in subdirs: treename = args["tree"] if args["tree"] else "input.tre" treefile = normpath(join(args["indir"],d,treename)) if exists(treefile): fout.write(open(treefile,'r').read()) else: intrees = args["tree"] mode = args["mode"] if args["mode"] else 'auto' k = int(args["k"]) if args["k"] else None if args["outdir"]: outdir = args["outdir"] check_dir(outdir) elif args["indir"]: outdir = args["indir"] else: outdir = splitext(intrees)[0] + "_treeshrink" mkdir(outdir) ''' Check to make sure output can be written''' if args["indir"]: i = 0 fName,ext = splitext(basename(intrees)) for sd in subdirs: outfile = normpath(join(outdir,sd, fName + "_shrunk_RS_" + quantiles[i] + ".txt")) with open(outfile,'w') as f: pass trees = TreeList.get(path=intrees,schema='newick',preserve_underscores=True) if mode=='auto' and len(trees) < MIN_TREE_NUM: print("There are only " + str(len(trees)) + " gene trees in the dataset.") print("TreeShrink will run in 'All-genes' mode") mode='all-genes' gene_list = [[] for i in range(len(trees))] species_map = {} occ = {} removing_sets = [ [ [ ] for i in range(len(trees)) ] for j in range(len(quantiles)) ] for t,a_tree in enumerate(trees): # solve k-shrink a_filter = TreeFilter(ddpTree=a_tree,centroid_reroot=args["centroid"],scaling=scaling) a_filter.optFilter(d=k) # compute species feature (i.e. the max ratio associated with each species for this gene tree) mapping = {} #print(a_filter.min_diams) for i in range(1,len(a_filter.min_diams)): if a_filter.min_diams[i] == 0: print("Warning: tree %d has no diameter (has only zero branch lengths) after removing %d sequences." %(t+1,i)) break r = a_filter.min_diams[i-1]/a_filter.min_diams[i] removals = a_filter.list_removals(d=i) for s in removals: mapping[s] = r if s not in mapping else max(mapping[s],r) # gather per-species distributions and per-gene species features for s in mapping: if mode == 'per-species' or mode == 'auto': species_map[s] = [mapping[s]] if s not in species_map else species_map[s]+[mapping[s]] if mode == 'per-species' or mode == 'all-genes' or mode == 'auto': gene_list[t].append((s,mapping[s])) # fit kernel density to this gene's species features (per-gene mode) if mode == 'per-gene': filename = get_tmp_file("gene_%s.dat" %str(t)) with open(filename,'w') as f: for s in mapping: f.write(str(mapping[s])) f.write("\n") #n_missing = len(list(a_tree.leaf_node_iter())) - len(mapping) #for i in range(n_missing): # f.write("1.0") # f.write("\n") if len(mapping) > 1: for i,q in enumerate(quantiles): threshold = float(check_output(["Rscript",normpath(join(libdir,"R_scripts","find_threshold_loglnorm.R")),filename,q]).lstrip().rstrip()[4:]) #print("Threshold: ", threshold) for s in mapping: if mapping[s] > threshold: removing_sets[i][t].append(s) # update taxon occupancy (only for per-species mode) if mode == 'per-species' or mode == 'auto': for n in a_tree.leaf_node_iter(): s = n.taxon.label occ[s] = 1 if not s in occ else occ[s]+1 if mode == 'auto' or mode == 'per-species': flag = False for s in occ: if occ[s] < MIN_OCC: print ("Species " + s + " only exists in " + str(occ[s]) + " gene trees") flag = True if flag: if mode == 'auto': mode = 'all-genes' print ("There are species with low occupancy in the dataset. TreeShrink will run in 'All-genes' mode") else: print ("WARNING: 'Per-species' mode was selected for a dataset having low occupancy species. Consider switching to 'All-genes' mode") elif mode == 'auto': mode = 'per-species' print("Finish preprocessing. TreeShrink will run in 'Per-species' mode ... ") # fit kernel density to the per-species distributions and compute per-species threshold (per-species mode) if mode == 'per-species': for s in sorted(species_map): l = len(species_map[s]) for i in range(occ[s]-l): species_map[s].append(1) filename = get_tmp_file(s + ".dat") with open(filename,'w') as f: for v in species_map[s]: f.write(str(v)) f.write("\n") thresholds = [ 0 for i in range(len(quantiles)) ] for i,q in enumerate(quantiles): thresholds[i] = max(minimpact,float(check_output(["Rscript",normpath(join(libdir,"R_scripts","find_threshold_lkernel.R")),libdir,filename,q]).lstrip().rstrip()[5:])) print("%s:\n\t will be cut in %d trees where its impact is above %f for quantile %s" %(s,sum(1 for x in species_map[s] if x>thresholds[i]),thresholds[i],q,)) species_map[s] = (species_map[s],thresholds) for t,gene in enumerate(gene_list): for s,r in gene: for i,threshold in enumerate(species_map[s][1]): if r > threshold: removing_sets[i][t].append(s) # fit kernel density to all the species features across all genes and compute the global threshold (all-gene mode) if mode == 'all-genes': filename = get_tmp_file("all_genes" + ".dat") with open(filename,'w') as f: for gene in gene_list: for s,r in gene: f.write(str(r)) f.write("\n") for i,q in enumerate(quantiles): threshold = float(check_output(["Rscript",normpath(join(libdir,"R_scripts","find_threshold_lkernel.R")),libdir,filename,q]).lstrip().rstrip()[5:]) for t,gene in enumerate(gene_list): for s,r in gene: if r > threshold: removing_sets[i][t].append(s) print("Writing output ...\n") # Dendropy's filter_leaf_nodes() seems to have problem # i.e. it produces the trees that the treecmp tool cannot compute the MS distance (need further exploration) # use home-made code to prune the tree instead #treeName,treeExt = splitext(basename(intrees)) #outtrees = args["output"] if args["output"] else treeName + "_shrunk" + treeExt fName,ext = splitext(basename(intrees)) for i,RS in enumerate(removing_sets): trees_shrunk = deepcopy(trees) if args["indir"] is None: outfile = normpath(join(outdir,fName + "_RS_" + quantiles[i] + ".txt")) with open(outfile,'w') as f: for item in RS: for s in item: f.write(s + "\t") f.write("\n") for tree,rs in zip(trees_shrunk,RS): prune_tree(tree,rs) trees_shrunk.write_to_path(normpath(join(outdir,fName + "_" + quantiles[i] + ext)),'newick') else: for sd,item in zip(subdirs,RS): outfile = normpath(join(outdir,sd, fName + "_shrunk_RS_" + quantiles[i] + ".txt")) with open(outfile,'w') as f: for s in item: f.write(s + "\t") for sd,tree,rs in zip(subdirs,trees_shrunk,RS): L = set(x.taxon.label for x in tree.leaf_node_iter()) prune_tree(tree,rs) treeName,treeExt = splitext(args["tree"]) treefile = normpath(join(outdir,sd, treeName + "_shrunk_" + quantiles[i] + treeExt)) tree.write_to_path(treefile,'newick',unquoted_underscores=True,real_value_format_specifier=".16g") aln_filename = args["alignment"] if args["alignment"] else "input.fasta" alnName,alnExt = splitext(aln_filename) input_aln = normpath(join(args["indir"],sd,aln_filename)) if isfile(input_aln): output_aln = normpath(join(outdir,sd,alnName+"_shrunk"+quantiles[i]+alnExt)) alg = CompactAlignment() alg.read_file_object(input_aln,'fasta') S=set(alg.keys()) if (L.difference(alg.keys())) or S.difference(L): print("ERROR: For gene %s, alignment names don't match tree names. Will skip it.\n\tonly in tree:\t%s\n\tonly in alignment:\t%s"%(sd,str(L.difference(S)),str(S.difference(L)))) else: alg.remove_all(rs) alg.mask_gapy_sites(1) alg.write(output_aln,'fasta') if not args["tempdir"]: rmtree(tempdir) # call(["rm","-r",tempdir]) print("Output files written to " + outdir)
def main(): import treeshrink from treeshrink.optimal_filter_lib import TreeFilter from treeshrink.tree_lib import prune_tree from sys import argv, stdout from math import sqrt from subprocess import check_output, call import argparse from dendropy import Tree, TreeList from os.path import basename, dirname, splitext, realpath, join, normpath from os import mkdir, getcwd, rmdir from copy import deepcopy from tempfile import mkdtemp from shutil import rmtree import dendropy print("Launching " + treeshrink.PROGRAM_NAME + " version " + treeshrink.PROGRAM_VERSION) parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", required=True, help="Input trees") parser.add_argument( "-d", "--outdir", required=False, help="Output directory. Default: inferred from the input trees") parser.add_argument( "-t", "--tempdir", required=False, help= "Directory to keep temporary files. If specified, the temp files will be kept" ) parser.add_argument( "-o", "--output", required=False, help= "The name of the output trees. Default: inferred from the input trees") parser.add_argument( "-c", "--centroid", required=False, action='store_true', help= "Do centroid reroot in preprocessing. Highly recommended for large trees. Default: NO" ) parser.add_argument( "-k", "--k", required=False, help= "The maximum number of leaves that can be removed. Default: auto-select based on the data" ) parser.add_argument( "-q", "--quantiles", required=False, help="The quantile(s) to set threshold. Default is 0.05") parser.add_argument( "-m", "--mode", required=False, help= "Filtering mode: 'per-species', 'per-gene', 'all-genes','auto'. Default: auto" ) wdir = dirname(realpath(__file__)) args = vars(parser.parse_args()) MIN_OCC = 20 MIN_TREE_NUM = 20 quantiles = [q for q in args["quantiles"].split() ] if args["quantiles"] else ["0.05"] #print(quantiles) intrees = args["input"] treeName, treeExt = splitext(basename(intrees)) outtrees = args["output"] if args[ "output"] else treeName + "_shrunk" + treeExt mode = args["mode"] if args["mode"] else 'auto' k = int(args["k"]) if args["k"] else None outdir = args["outdir"] if args["outdir"] else splitext( intrees)[0] + "_treeshrink" mkdir(outdir) if args["tempdir"]: tempdir = args["tempdir"] mkdir(tempdir) else: tempdir = mkdtemp() #check_output(["mktemp","-d"]).rstrip() trees = TreeList.get(path=intrees, schema='newick', preserve_underscores=True) if mode == 'auto' and len(trees) < MIN_TREE_NUM: print("There are only " + str(len(trees)) + " gene trees in the dataset.") print("TreeShrink will run in 'All-genes' mode") mode = 'all-genes' gene_list = [[] for i in range(len(trees))] species_map = {} occ = {} removing_sets = [[[] for i in range(len(trees))] for j in range(len(quantiles))] for t, a_tree in enumerate(trees): # solve k-shrink a_filter = TreeFilter(ddpTree=a_tree, centroid_reroot=args["centroid"]) a_filter.optFilter(d=k) # compute species feature (i.e. the max ratio associated with each species for this gene tree) mapping = {} for i in range(1, len(a_filter.min_diams)): r = a_filter.min_diams[i - 1] / a_filter.min_diams[i] removals = a_filter.list_removals(d=i) for s in removals: mapping[s] = r if s not in mapping else max(mapping[s], r) # gather per-species distributions and per-gene species features for s in mapping: if mode == 'per-species' or mode == 'auto': species_map[s] = [ mapping[s] ] if s not in species_map else species_map[s] + [mapping[s]] if mode == 'per-species' or mode == 'all-genes' or mode == 'auto': gene_list[t].append((s, mapping[s])) # fit kernel density to this gene's species features (per-gene mode) if mode == 'per-gene': filename = normpath(join(tempdir, "gene_" + str(t) + ".dat")) with open(filename, 'w') as f: for s in mapping: f.write(str(mapping[s])) f.write("\n") #n_missing = len(list(a_tree.leaf_node_iter())) - len(mapping) #for i in range(n_missing): # f.write("1.0") # f.write("\n") if len(mapping) > 1: for i, q in enumerate(quantiles): threshold = float( check_output([ "Rscript", normpath( join(wdir, "R_scripts", "find_threshold_loglnorm.R")), filename, q ]).lstrip().rstrip()[4:]) #print("Threshold: ", threshold) for s in mapping: if mapping[s] > threshold: removing_sets[i][t].append(s) # update taxon occupancy (only for per-species mode) if mode == 'per-species' or mode == 'auto': for n in a_tree.leaf_node_iter(): s = n.taxon.label occ[s] = 1 if not s in occ else occ[s] + 1 if mode == 'auto' or mode == 'per-species': flag = False for s in occ: if occ[s] < MIN_OCC: print("Species " + s + " only exists in " + str(occ[s]) + " gene trees") flag = True if flag: if mode == 'auto': mode = 'all-genes' print( "There are species with low occupancy in the dataset. TreeShrink will run in 'All-genes' mode" ) else: print( "WARNING: 'Per-species' mode was selected for a dataset having low occupancy species. Consider switching to 'All-genes' mode" ) elif mode == 'auto': mode = 'per-species' print( "Finish preprocessing. TreeShrink will run in 'Per-species' mode" ) # fit kernel density to the per-species distributions and compute per-species threshold (per-species mode) if mode == 'per-species': for s in species_map: l = len(species_map[s]) for i in range(occ[s] - l): species_map[s].append(1) filename = normpath(join(tempdir, s + ".dat")) with open(filename, 'w') as f: for v in species_map[s]: f.write(str(v)) f.write("\n") thresholds = [0 for i in range(len(quantiles))] for i, q in enumerate(quantiles): thresholds[i] = float( check_output([ "Rscript", normpath( join(wdir, "R_scripts", "find_threshold_lkernel.R")), wdir, filename, q ]).lstrip().rstrip()[5:]) species_map[s] = (species_map[s], thresholds) for t, gene in enumerate(gene_list): for s, r in gene: for i, threshold in enumerate(species_map[s][1]): if r > threshold: removing_sets[i][t].append(s) # fit kernel density to all the species features across all genes and compute the global threshold (all-gene mode) if mode == 'all-genes': filename = normpath(join(tempdir, "all_genes" + ".dat")) with open(filename, 'w') as f: for gene in gene_list: for s, r in gene: f.write(str(r)) f.write("\n") for i, q in enumerate(quantiles): threshold = float( check_output([ "Rscript", normpath( join(wdir, "R_scripts", "find_threshold_lkernel.R")), wdir, filename, q ]).lstrip().rstrip()[5:]) for t, gene in enumerate(gene_list): for s, r in gene: if r > threshold: removing_sets[i][t].append(s) # Dendropy's filter_leaf_nodes() seems to have problem # i.e. it produces the trees that the treecmp tool cannot compute the MS distance (need further exploration) # use home-made code to prune the tree instead treeName, treeExt = splitext(outtrees) fName, ext = splitext(outtrees) for i, RS in enumerate(removing_sets): trees_shrunk = deepcopy(trees) outfile = normpath(join(outdir, fName + "_RS_" + quantiles[i] + ".txt")) with open(outfile, 'w') as f: for item in RS: for s in item: f.write(s + "\t") f.write("\n") for t, tree in enumerate(trees_shrunk): #filt = lambda node: False if (node.taxon is not None and node.taxon.label in RS[t]) else True #tree.filter_leaf_nodes(filt,update_bipartitions=True) prune_tree(tree, RS[t]) trees_shrunk.write_to_path( normpath(join(outdir, treeName + "_" + quantiles[i] + treeExt)), 'newick') if not args["tempdir"]: rmtree(tempdir) # call(["rm","-r",tempdir]) print("Output files written to " + outdir)
#! /usr/bin/env python from dendropy import TreeList from sys import argv from tree_lib import compute_diameter infile = argv[1] treelist = TreeList.get(path=infile, schema="newick") compute_diameter(treelist)
def main(args): import os import itertools import subprocess from dendropy import TreeList from dendropy.calculate import treecompare import ts_extras def ts_txts_to_trees(ts_nodes, ts_edges, trees_outname=None): import shutil import msprime logging.info("== Converting new ts ARG to .trees ===") try: ts = msprime.load_text(nodes=ts_nodes, edges=ts_edges) except: logging.warning( "Can't load the texts file properly. Saved copied to 'bad.nodes' & 'bad.edges' for inspection" ) shutil.copyfile(ts_nodes.name, "bad.nodes") shutil.copyfile(ts_edges.name, "bad.edges") raise logging.info("== loaded {}, {}===".format(ts_nodes.name, ts_edges.name)) try: simple_ts = ts.simplify() except: ts.dump("bad.trees") logging.warning( "Can't simplify. .trees file dumped to 'bad.trees'") raise if trees_outname: simple_ts.dump(trees_outname) return (simple_ts) msprime.TreeSequence.write_nexus_trees = ts_extras.write_nexus_trees iterations = 20 full_prefix = os.path.join( args.outputdir, os.path.splitext(os.path.basename(args.trees_file))[0]) with open(full_prefix + ".sites", "w+") as aw_in: tsfile_to_ARGweaver_in(args.trees_file, aw_in) cmd = [ os.path.join(args.ARGweaver_executable_dir, args.ARGweaver_sample_executable), '--sites', aw_in.name, '--popsize', str(args.effective_population_size), '--recombrate', str(args.recombination_rate), '--mutrate', str(args.mutation_rate), '--overwrite', '--randseed', str(int(args.random_seed)), '--iters', str(iterations), '--sample-step', str(iterations), '--output', full_prefix ] assert os.stat(aw_in.name).st_size > 0, "Initial .sites file is empty" logging.debug("running '{}'".format(" ".join(cmd))) subprocess.call(cmd) #now check that the smc file produced can be converted to nodes smc = full_prefix + "." + str(iterations) + ".smc.gz" assert os.path.isfile(smc), "No output file names {}".format(smc) smc_nex = smc.replace(".smc.gz", ".nex") with open(smc_nex, "w+") as smc_nex_out: ARGweaver_smc_to_nexus(smc, smc_nex_out) arg_nex = smc.replace(".smc.gz", ".ts_nex") with open(smc.replace(".smc.gz", ".TSnodes"), "w+") as nodes, \ open(smc.replace(".smc.gz", ".TSedges"), "w+") as edges, \ open(arg_nex, "w+") as ts_nex: ARGweaver_smc_to_ts_txts( os.path.join(args.ARGweaver_executable_dir, args.ARGweaver_smc2arg_executable), smc.replace(".smc.gz", ""), nodes, edges) ts = ts_txts_to_trees(nodes, edges) ts.write_nexus_trees(ts_nex) smc_trees = TreeList.get(path=smc_nex, schema="nexus") arg_trees = TreeList.get(path=arg_nex, schema="nexus", taxon_namespace=smc_trees[0].taxon_namespace) #zero_based_tip_numbers assumed False) #Check the smc trees against the ts-imported equivalents #NB, the ARGweaver output does not specify where mutations occur on the ARG, so we cannot #reconstruct the sequences implied by this ARG for testing purposes, and thus cannot compare #the original sequences with the reconstructed ones assert len(smc_trees) == len(arg_trees) assert [int(float(t.label)) for t in smc_trees ] == [int(float(t.label)) for t in arg_trees] for i, (smc_tree, arg_tree) in enumerate(zip(smc_trees, arg_trees)): if treecompare.symmetric_difference(smc_tree, arg_tree) == 0: print( "✓ Tree " + str(i + 1) + " in AW SMC file is identical to that produced by SMC->ARG->STS" ) else: raise Exception("Tree {} differs\n".format(i+1) + \ smc_tree.label + " (smc) = " + smc_tree.as_string(schema="newick", suppress_edge_lengths=True, suppress_internal_node_labels = True, suppress_rooting = True) + \ arg_tree.label + " (arg) = " + arg_tree.as_string(schema="newick", suppress_edge_lengths=True, suppress_internal_node_labels = True, suppress_rooting = True))
import sys from warnings import warn from dendropy import TreeList from collections import OrderedDict parser = argparse.ArgumentParser( description= 'Add genus names to nodes on the tree, for each monophyletic genus') parser.add_argument('treefile', type=argparse.FileType('r'), help='A newick-format tree') args = parser.parse_args() trees = TreeList.get(file=args.treefile, schema='newick', preserve_underscores=True, rooting='default-rooted') tree = trees[0] #compile a list of genus names count = {} for node in tree.preorder_internal_node_iter(): if node.label: nl = re.sub(r'_\d+_$', '', node.label).lower() count[nl] = 1 + (count.get(nl) or 0) dups = {name: 0 for name, n in count.items() if n > 1} #collect a list of genus names
#!/opt/local/bin/python ### Imports ### import dendropy from dendropy import TreeList,Tree import sys import argparse from os import walk import glob ### Main ### ### Argparse parser = argparse.ArgumentParser(description="Reads a newick trees and reroots it with a basal trifurcation",prog="strictunroot.py") parser.add_argument("-i",required=True,type=str,help="Input newick tree name") parser.add_argument("-o",required=True,type=str,help="Output file name") args = parser.parse_args() ###Main itrees=TreeList.get(path=args.i,schema="newick",rooting="default-rooted",preserve_underscores=True) otrees=TreeList() for tree in itrees: tree.collapse_basal_bifurcation() otrees.append(tree) otrees.write(path=args.o,schema="newick",unquoted_underscores=True,suppress_rooting=True) print("Done!")
metavar="input") parser.add_argument("-c", type=str, help="Tree to constrain the search ala RAxML's -g", metavar="constrain") parser.add_argument("-o", type=str, help="Output file name", metavar="output") #parser.add_argument("-s",type=int,help="Random number generator seed",metavar="seed") args = parser.parse_args() ###Random number machinery initialization #if args.s: # seed=args.s #else: # seed=random.randint(0,sys.maxint) #random.seed(seed) #print("Seed: %d" % seed) ###Input trees gene_trees = TreeList.get(path=args.i, schema="newick", rooting="force-unrooted") constrainTree = Tree.get(path=args.c, schema="newick") consensus = gene_trees.constrained_consensus(constrainTree=constrainTree, summarize_splits=False, min_freq=0) #Write gene trees consensus.write(path=args.o, schema="newick", suppress_rooting=True) print("Done!")
'''Label all unnamed nodes with an underscore + number. ''' import argparse import re import sys from warnings import warn from dendropy import TreeList from collections import OrderedDict parser = argparse.ArgumentParser(description='Add genus names to nodes on the tree, for each monophyletic genus') parser.add_argument('treefile', type=argparse.FileType('r'), help='A newick-format tree') args = parser.parse_args() trees = TreeList.get(file=args.treefile, schema='newick', preserve_underscores=True, rooting='default-rooted') tree = trees[0] #compile a list of genus names count = {} for node in tree.preorder_internal_node_iter(): if node.label: nl = re.sub(r'_\d+_$','', node.label).lower() count[nl] = 1+ (count.get(nl) or 0) dups = {name:0 for name,n in count.items() if n > 1} #collect a list of genus names genera = OrderedDict()
from os import walk import glob ### Main ### ### Argparse parser = argparse.ArgumentParser( description="Reads a newick trees and reroots it with a basal trifurcation", prog="strictunroot.py") parser.add_argument("-i", required=True, type=str, help="Input newick tree name") parser.add_argument("-o", required=True, type=str, help="Output file name") args = parser.parse_args() ###Main itrees = TreeList.get(path=args.i, schema="newick", rooting="default-rooted", preserve_underscores=True) otrees = TreeList() for tree in itrees: tree.collapse_basal_bifurcation() otrees.append(tree) otrees.write(path=args.o, schema="newick", unquoted_underscores=True, suppress_rooting=True) print("Done!")